In [16]:
import scanpy as sc 
import numpy as np  
import pickle as pkl
import gzip
import anndata
from pathlib import Path
import sys
from tqdm import tqdm
import pickle
#sys.path.insert(1, str('ondemand/data/sys/lscm/bin/Geneformer'))
import anndata as ad
import scipy.sparse as sp
import pandas as pd
## geneformer imports
from geneformer import TranscriptomeTokenizer, EmbExtractor, InSilicoPerturber, InSilicoPerturberStats

In [2]:
## Define paths

d_path = '/data/scratch/bty416/scFMs/data'
raw_expr_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts'

### Norman dataset

#### Control data

In [5]:
## Load files for Norman data, including mask and check shape

with open(f'{raw_expr_path}/norman_mask_df.pkl', 'rb') as f:
    norman_cl_mask = pkl.load(f)
    
norman_ctrl_adata = sc.read_h5ad(f'{raw_expr_path}/norman/ctrl_norman_raw_counts.h5ad')

print(f'Mask shape: {norman_cl_mask.shape}, adata shape: {norman_ctrl_adata.shape}')

Mask shape: (11855, 102), adata shape: (11855, 21265)


In [12]:
def gf_preprocess(data_directory, output_directory):
    
    ctrl_adata = sc.read_h5ad(data_directory)
    
    if 'ensembl_id' not in ctrl_adata.var:
    ## Rename columns for tokenizer to function correctly
        ctrl_adata.var['ensembl_id'] = ctrl_adata.var_names
    
    if 'ncounts' in ctrl_adata.var:
        ctrl_adata.var.rename(columns={'ncounts': 'n_counts'}, inplace=True)
    
    else: 
        ctrl_adata.obs.rename(columns={'read_count': 'n_counts'}, inplace=True)
    
    ctrl_adata.write_h5ad(output_directory)
    
    return ctrl_adata

In [10]:
norman_ctrl_adata = gf_preprocess(f'{raw_expr_path}/norman/ctrl_norman_raw_counts.h5ad', f'{raw_expr_path}/tokenizer_input/ctrl_norman_raw_counts.h5ad')

In [13]:
def gf_tokenizer(data_directory: str, output_path: str):
    
    tk = TranscriptomeTokenizer(nproc=16)
    
    tokenized_cells, cell_metadata = tk.tokenize_anndata(str(data_directory))
    
    tokenized_dataset = tk.create_dataset(tokenized_cells, cell_metadata)
    
    tokenized_dataset.save_to_disk(output_path)
    
    print(f'Saved tokenized_dataset to {output_path}')
    
    return tokenized_dataset

In [40]:
norman_directory = str(Path(f'{raw_expr_path}/tokenizer_input/ctrl_norman_raw_counts.h5ad').resolve())
output_directory = Path(f'{d_path}/tokenized_data/').resolve()
norman_prefix = "norman"
norman_output_path = str((Path(output_directory) / norman_prefix).with_suffix(".dataset"))

norman_token = gf_tokenizer(norman_directory, norman_output_path)

  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/ctrl_norman_raw_counts.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.


Map (num_proc=16): 100%|██████████| 11855/11855 [00:29<00:00, 400.47 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 11855/11855 [00:00<00:00, 202354.15 examples/s]

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/norman.dataset





In [14]:
def gf_extract_ctrl_embs(emb_mode: str = 'cell', batch_size: int = 64, dataset: str = 'norman'):
    
    # initiate EmbExtractor
    embex = EmbExtractor(model_type="Pretrained",
                         num_classes=0,
                         emb_mode=emb_mode,
                         emb_layer=0,
                         max_ncells=20000,
                         forward_batch_size=batch_size,
                         nproc=16)

    # extracts embedding from input data
    # input data is tokenized rank value encodings generated by Geneformer tokenizer (see tokenizing_scRNAseq_data.ipynb)
    # example dataset: https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files/cell_classification/disease_classification/human_dcm_hcm_nf.dataset
    embs = embex.extract_embs("../bin/Geneformer",
                              f"{d_path}/tokenized_data/{dataset}.dataset",
                              f"{d_path}/embeddings/",
                              f"{dataset}_ctrl_{emb_mode}")
    
    embs.to_pickle(f"{d_path}/embeddings/{dataset}_ctrl_{emb_mode}.pkl.gz", compression="gzip")
    
    return embs

In [12]:
norman_ctrl_cell = gf_extract_ctrl_embs()

100%|██████████| 186/186 [00:45<00:00,  4.10it/s]


***
#### Perturbation data

In [13]:
## Load data

with open(f'{raw_expr_path}/norman/all_perts.pkl', 'rb') as f:
    norman_all_perts = pkl.load(f)

In [14]:
## Find perturbations in control data
len(set(norman_ctrl_adata.var['gene_symbols']).intersection(norman_all_perts))

102

In [15]:
## Find non expressed genes
non_exp_gene = norman_cl_mask.columns[norman_cl_mask.sum() == 0]

In [45]:
def prep_pert_data(all_perts, mask, adata, save_path):
    
    non_exp_genes = mask.columns[mask.sum() == 0]
    
    for pert in tqdm(all_perts):
        
        if pert in non_exp_genes:
            pass
        
        elif Path(f'{save_path}/{pert}_slice.h5ad').is_file():
            print(f'{pert} adata already prepared.')
        
        else:
            try:
                cell_mask = mask[pert].values

                if 'gene_symbols' in adata.var:
                    gene_mask = adata.var['gene_symbols'] != pert

                else:
                    gene_mask = adata.var_names != pert

                sliced_adata = adata[cell_mask, gene_mask]

                assert sliced_adata.shape[1] == adata.shape[1] - 1

                sliced_adata.write_h5ad(f'{save_path}/{pert}_slice.h5ad')
            
            except KeyError as ke:
                print('Key not found in mask:', ke)
                pass

In [234]:
norman_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman'

prep_pert_data(norman_all_perts, norman_cl_mask, norman_ctrl_adata, norman_pert_path)

100%|██████████| 102/102 [00:00<00:00, 890.05it/s]

FOXO4 adata already prepared.
HOXA13 adata already prepared.
CSRNP1 adata already prepared.
FOXA1 adata already prepared.
DUSP9 adata already prepared.
SLC4A1 adata already prepared.
MAP2K6 adata already prepared.
KMT2A adata already prepared.
LHX1 adata already prepared.
CKS1B adata already prepared.
NIT1 adata already prepared.
MAP2K3 adata already prepared.
HOXB9 adata already prepared.
ZBTB1 adata already prepared.
JUN adata already prepared.
UBASH3B adata already prepared.
CDKN1A adata already prepared.
NCL adata already prepared.
IKZF3 adata already prepared.
FOXF1 adata already prepared.
FOXA3 adata already prepared.
CNNM4 adata already prepared.
HK2 adata already prepared.
FOXL2 adata already prepared.
RHOXF2 adata already prepared.
EGR1 adata already prepared.
HOXC13 adata already prepared.
ZNF318 adata already prepared.
PRDM1 adata already prepared.
S1PR2 adata already prepared.
CBFA2T3 adata already prepared.
SPI1 adata already prepared.
HNF4A adata already prepared.
FEV ada




In [16]:
def tokenize_pert_data(all_perts, non_exp_genes, input_dir, output_dir):
        
    for pert in tqdm(all_perts):
        
        if pert in non_exp_genes:
            pass
        
        else:
            in_dir = f'{input_dir}/{pert}_slice.h5ad'
            prefix = f'{pert}'
            out_path = (Path(output_dir) / prefix).with_suffix(".dataset")

            if out_path.is_dir():
                print(f'{out_path} already exists {pert} adata already tokenized.')
            
            else: 
                tokenized = gf_tokenizer(in_dir, str(out_path))

In [50]:
norman_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman'
tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data'

non_exp_gene = norman_cl_mask.columns[norman_cl_mask.sum() == 0]

tokenize_pert_data(norman_all_perts, non_exp_gene, norman_pert_path, tokenized_dir)

  0%|          | 0/102 [00:00<?, ?it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/FOXO4.dataset already exists FOXO4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/HOXA13.dataset already exists HOXA13 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/CSRNP1.dataset already exists CSRNP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/FOXA1.dataset already exists FOXA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/DUSP9.dataset already exists DUSP9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/SLC4A1.dataset already exists SLC4A1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/MAP2K6.dataset already exists MAP2K6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/KMT2A.dataset already exists KMT2A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/LHX1.dataset already exists LHX1 adata already tokenized.
/data/scratch/bty416/scFMs/data

  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/IRF1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/IRF1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/SET_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/SET.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/PTPN1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/PTPN1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CEBPA_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CEBPA.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/GLB1L2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/GLB1L2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/DLX2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/158 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 10/158 [00:01<00:24,  6.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 20/158 [00:03<00:21,  6.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 30/158 [00:04<00:19,  6.59 examples/s][A
Map (num_proc=16):  25%|██▌       | 40/158 [00:05<00:16,  7.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 50/158 [00:06<00:13,  7.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 60/158 [00:08<00:12,  7.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 70/158 [00:09<00:11,  7.44 examples/s][A
Map (num_proc=16):  51%|█████     | 80/158 [00:10<00:09,  8.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 90/158 [00:11<00:07,  8.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 100/158 [00:12<00:06,  8.93 examples/s][A
Map (num_proc=16):  70%|██████▉   | 110/158 [00:13<00:05,  9.16 examples/s][A
Map (num_proc=16):  76%|███████▌  | 120/158 [00:14<00:04,  9.42 exampl

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/DLX2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/KLF1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/KLF1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/SLC6A9_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/416 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 26/416 [00:01<00:25, 15.32 examples/s][A
Map (num_proc=16):  12%|█▎        | 52/416 [00:03<00:21, 16.72 examples/s][A
Map (num_proc=16):  19%|█▉        | 78/416 [00:04<00:19, 17.46 examples/s][A
Map (num_proc=16):  25%|██▌       | 104/416 [00:05<00:15, 19.88 examples/s][A
Map (num_proc=16):  31%|███▏      | 130/416 [00:06<00:13, 21.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 156/416 [00:07<00:11, 22.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 182/416 [00:08<00:09, 23.74 examples/s][A
Map (num_proc=16):  50%|█████     | 208/416 [00:09<00:08, 24.27 examples/s][A
Map (num_proc=16):  56%|█████▋    | 234/416 [00:10<00:07, 24.63 examples/s][A
Map (num_proc=16):  62%|██████▎   | 260/416 [00:11<00:06, 24.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 286/416 [00:12<00:05, 25.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 312/416 [00:13<00:04, 25.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/SLC6A9.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MAPK1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.66 examples/s][A
Map (num_proc=16):  43%|████▎     | 215/500 [00:10<00:14, 19.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MAPK1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ARRDC3_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ARRDC3.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/UBASH3A_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/UBASH3A.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/IER5L_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/IER5L.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/IGDCC3_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/IGDCC3.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ETS2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 26.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ETS2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/KIF18B_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/KIF18B.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/BCL2L11_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/BCL2L11.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/BPGM_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 16.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/BPGM.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/OSR2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/OSR2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/SLC38A2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/SLC38A2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MAP4K5_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MAP4K5.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CELF2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CELF2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/TMSB4X_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/TMSB4X.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/TGFBR2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/78 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 5/78 [00:01<00:23,  3.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 10/78 [00:03<00:20,  3.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 15/78 [00:04<00:16,  3.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 20/78 [00:05<00:13,  4.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 25/78 [00:06<00:11,  4.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 30/78 [00:07<00:10,  4.68 examples/s][A
Map (num_proc=16):  45%|████▍     | 35/78 [00:08<00:08,  4.80 examples/s][A
Map (num_proc=16):  51%|█████▏    | 40/78 [00:09<00:07,  4.86 examples/s][A
Map (num_proc=16):  58%|█████▊    | 45/78 [00:10<00:06,  4.92 examples/s][A
Map (num_proc=16):  64%|██████▍   | 50/78 [00:11<00:05,  4.90 examples/s][A
Map (num_proc=16):  71%|███████   | 55/78 [00:12<00:04,  4.95 examples/s][A
Map (num_proc=16):  77%|███████▋  | 60/78 [00:13<00:03,  4.93 examples/s][A
Map (num

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/TGFBR2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ELMSAN1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.48 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ELMSAN1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CLDN6_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CLDN6.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ATL1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/43 [00:00<?, ? examples/s][A
Map (num_proc=16):   7%|▋         | 3/43 [00:01<00:22,  1.80 examples/s][A
Map (num_proc=16):  14%|█▍        | 6/43 [00:03<00:19,  1.92 examples/s][A
Map (num_proc=16):  21%|██        | 9/43 [00:04<00:16,  2.00 examples/s][A
Map (num_proc=16):  28%|██▊       | 12/43 [00:06<00:15,  2.05 examples/s][A
Map (num_proc=16):  35%|███▍      | 15/43 [00:07<00:13,  2.14 examples/s][A
Map (num_proc=16):  42%|████▏     | 18/43 [00:08<00:11,  2.13 examples/s][A
Map (num_proc=16):  49%|████▉     | 21/43 [00:10<00:10,  2.17 examples/s][A
Map (num_proc=16):  56%|█████▌    | 24/43 [00:11<00:07,  2.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 27/43 [00:12<00:07,  2.28 examples/s][A
Map (num_proc=16):  70%|██████▉   | 30/43 [00:13<00:05,  2.20 examples/s][A
Map (num_proc=16):  77%|███████▋  | 33/43 [00:15<00:04,  2.16 examples/s][A
Map (num_proc=16):  81%|████████▏ | 35/43 [00:16<00:03,  2.02 examples/s][A
Map (num_p

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ATL1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MIDN_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MIDN.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/COL2A1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/COL2A1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/TP73_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/271 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 17/271 [00:01<00:25, 10.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 34/271 [00:03<00:21, 10.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 51/271 [00:04<00:18, 12.13 examples/s][A
Map (num_proc=16):  25%|██▌       | 68/271 [00:05<00:15, 13.53 examples/s][A
Map (num_proc=16):  31%|███▏      | 85/271 [00:06<00:12, 14.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 102/271 [00:07<00:11, 15.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 119/271 [00:08<00:09, 15.50 examples/s][A
Map (num_proc=16):  50%|█████     | 136/271 [00:09<00:08, 15.89 examples/s][A
Map (num_proc=16):  56%|█████▋    | 153/271 [00:10<00:08, 14.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 170/271 [00:12<00:07, 13.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 187/271 [00:13<00:06, 13.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 204/271 [00:15<00:05, 12.70 ex

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/TP73.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CEBPB_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CEBPB.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MAP4K3_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MAP4K3.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ZBTB25_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ZBTB25.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/KIF2C_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/KIF2C.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/TSC22D1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/TSC22D1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/BAK1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/BAK1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ZC3HAV1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ZC3HAV1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ISL2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ISL2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CNN1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.91 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CNN1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/PTPN9_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/PTPN9.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/FOSB_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/104 [00:00<?, ? examples/s][A
Map (num_proc=16):   7%|▋         | 7/104 [00:01<00:22,  4.31 examples/s][A
Map (num_proc=16):  13%|█▎        | 14/104 [00:03<00:20,  4.47 examples/s][A
Map (num_proc=16):  20%|██        | 21/104 [00:04<00:17,  4.84 examples/s][A
Map (num_proc=16):  27%|██▋       | 28/104 [00:05<00:13,  5.51 examples/s][A
Map (num_proc=16):  34%|███▎      | 35/104 [00:06<00:11,  5.97 examples/s][A
Map (num_proc=16):  40%|████      | 42/104 [00:07<00:09,  6.24 examples/s][A
Map (num_proc=16):  47%|████▋     | 49/104 [00:08<00:08,  6.47 examples/s][A
Map (num_proc=16):  54%|█████▍    | 56/104 [00:09<00:07,  6.60 examples/s][A
Map (num_proc=16):  60%|█████▉    | 62/104 [00:10<00:06,  6.43 examples/s][A
Map (num_proc=16):  65%|██████▌   | 68/104 [00:11<00:05,  6.28 examples/s][A
Map (num_proc=16):  71%|███████   | 74/104 [00:12<00:04,  6.20 examples/s][A
Map (num_proc=16):  77%|███████▋  | 80/104 [00:13<00:03,  6.13 examples/s

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/FOSB.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/TBX3_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=10):   0%|          | 0/10 [00:00<?, ? examples/s][A
Map (num_proc=10):  10%|█         | 1/10 [00:01<00:14,  1.65s/ examples][A
Map (num_proc=10):  20%|██        | 2/10 [00:03<00:12,  1.54s/ examples][A
Map (num_proc=10):  30%|███       | 3/10 [00:04<00:09,  1.31s/ examples][A
Map (num_proc=10):  40%|████      | 4/10 [00:05<00:07,  1.19s/ examples][A
Map (num_proc=10):  50%|█████     | 5/10 [00:06<00:05,  1.12s/ examples][A
Map (num_proc=10):  60%|██████    | 6/10 [00:07<00:04,  1.08s/ examples][A
Map (num_proc=10):  70%|███████   | 7/10 [00:08<00:03,  1.05s/ examples][A
Map (num_proc=10):  80%|████████  | 8/10 [00:09<00:02,  1.04s/ examples][A
Map (num_proc=10):  90%|█████████ | 9/10 [00:10<00:01,  1.03s/ examples][A
Map (num_proc=10): 100%|██████████| 10/10 [00:11<00:00,  1.13s/ examples][A

Saving the dataset (0/1 shards):   0%|          | 0/10 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 10/10 [00:00<00:00, 1352.52 examples/

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/TBX3.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/RUNX1T1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/27 [00:00<?, ? examples/s][A
Map (num_proc=16):   7%|▋         | 2/27 [00:01<00:20,  1.21 examples/s][A
Map (num_proc=16):  15%|█▍        | 4/27 [00:03<00:17,  1.31 examples/s][A
Map (num_proc=16):  22%|██▏       | 6/27 [00:04<00:15,  1.36 examples/s][A
Map (num_proc=16):  30%|██▉       | 8/27 [00:05<00:13,  1.37 examples/s][A
Map (num_proc=16):  37%|███▋      | 10/27 [00:07<00:12,  1.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 12/27 [00:08<00:09,  1.56 examples/s][A
Map (num_proc=16):  52%|█████▏    | 14/27 [00:09<00:07,  1.69 examples/s][A
Map (num_proc=16):  59%|█████▉    | 16/27 [00:10<00:06,  1.78 examples/s][A
Map (num_proc=16):  67%|██████▋   | 18/27 [00:11<00:04,  1.83 examples/s][A
Map (num_proc=16):  74%|███████▍  | 20/27 [00:12<00:03,  1.89 examples/s][A
Map (num_proc=16):  81%|████████▏ | 22/27 [00:13<00:02,  1.93 examples/s][A
Map (num_proc=16):  85%|████████▌ | 23/27 [00:14<00:02,  1.65 examples/s][A
Map (num_pr

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/RUNX1T1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CDKN1C_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CDKN1C.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/AHR_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/47 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 3/47 [00:01<00:24,  1.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 6/47 [00:03<00:21,  1.93 examples/s][A
Map (num_proc=16):  19%|█▉        | 9/47 [00:04<00:19,  1.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 12/47 [00:06<00:18,  1.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 15/47 [00:07<00:14,  2.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 18/47 [00:08<00:12,  2.36 examples/s][A
Map (num_proc=16):  45%|████▍     | 21/47 [00:09<00:10,  2.53 examples/s][A
Map (num_proc=16):  51%|█████     | 24/47 [00:10<00:08,  2.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 27/47 [00:11<00:07,  2.72 examples/s][A
Map (num_proc=16):  64%|██████▍   | 30/47 [00:12<00:06,  2.80 examples/s][A
Map (num_proc=16):  70%|███████   | 33/47 [00:13<00:05,  2.51 examples/s][A
Map (num_proc=16):  77%|███████▋  | 36/47 [00:15<00:04,  2.34 examples/s][A
Map (num_p

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/AHR.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/PRTG_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/38 [00:00<?, ? examples/s][A
Map (num_proc=16):   8%|▊         | 3/38 [00:01<00:19,  1.83 examples/s][A
Map (num_proc=16):  16%|█▌        | 6/38 [00:02<00:15,  2.06 examples/s][A
Map (num_proc=16):  24%|██▎       | 9/38 [00:03<00:12,  2.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 12/38 [00:05<00:11,  2.29 examples/s][A
Map (num_proc=16):  39%|███▉      | 15/38 [00:06<00:10,  2.19 examples/s][A
Map (num_proc=16):  47%|████▋     | 18/38 [00:07<00:08,  2.37 examples/s][A
Map (num_proc=16):  53%|█████▎    | 20/38 [00:09<00:08,  2.01 examples/s][A
Map (num_proc=16):  58%|█████▊    | 22/38 [00:10<00:08,  1.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 24/38 [00:12<00:08,  1.74 examples/s][A
Map (num_proc=16):  68%|██████▊   | 26/38 [00:13<00:07,  1.64 examples/s][A
Map (num_proc=16):  74%|███████▎  | 28/38 [00:14<00:06,  1.57 examples/s][A
Map (num_proc=16):  79%|███████▉  | 30/38 [00:15<00:04,  1.68 examples/s][A
Map (num_p

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/PRTG.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/BCORL1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/BCORL1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/POU3F2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/POU3F2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MAML2_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/189 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 12/189 [00:01<00:24,  7.22 examples/s][A
Map (num_proc=16):  13%|█▎        | 24/189 [00:03<00:21,  7.74 examples/s][A
Map (num_proc=16):  19%|█▉        | 36/189 [00:04<00:19,  7.92 examples/s][A
Map (num_proc=16):  25%|██▌       | 48/189 [00:05<00:16,  8.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 60/189 [00:07<00:15,  8.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 72/189 [00:08<00:14,  8.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 84/189 [00:10<00:12,  8.73 examples/s][A
Map (num_proc=16):  51%|█████     | 96/189 [00:10<00:09,  9.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 108/189 [00:12<00:07, 10.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 120/189 [00:13<00:06, 10.58 examples/s][A
Map (num_proc=16):  70%|██████▉   | 132/189 [00:14<00:05,  9.73 examples/s][A
Map (num_proc=16):  76%|███████▌  | 144/189 [00:16<00:05,  8.66 examp

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MAML2.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/COL1A1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/85 [00:00<?, ? examples/s][A
Map (num_proc=16):   7%|▋         | 6/85 [00:01<00:21,  3.63 examples/s][A
Map (num_proc=16):  14%|█▍        | 12/85 [00:03<00:18,  3.85 examples/s][A
Map (num_proc=16):  21%|██        | 18/85 [00:04<00:17,  3.92 examples/s][A
Map (num_proc=16):  28%|██▊       | 24/85 [00:06<00:15,  3.98 examples/s][A
Map (num_proc=16):  35%|███▌      | 30/85 [00:07<00:12,  4.50 examples/s][A
Map (num_proc=16):  41%|████      | 35/85 [00:08<00:10,  4.60 examples/s][A
Map (num_proc=16):  47%|████▋     | 40/85 [00:09<00:09,  4.68 examples/s][A
Map (num_proc=16):  53%|█████▎    | 45/85 [00:10<00:08,  4.75 examples/s][A
Map (num_proc=16):  59%|█████▉    | 50/85 [00:11<00:07,  4.78 examples/s][A
Map (num_proc=16):  65%|██████▍   | 55/85 [00:12<00:06,  4.80 examples/s][A
Map (num_proc=16):  71%|███████   | 60/85 [00:13<00:05,  4.84 examples/s][A
Map (num_proc=16):  76%|███████▋  | 65/85 [00:14<00:04,  4.34 examples/s][A
Map (num

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/COL1A1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/ZBTB10_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/ZBTB10.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/CEBPE_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/442 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 28/442 [00:01<00:25, 16.44 examples/s][A
Map (num_proc=16):  13%|█▎        | 56/442 [00:03<00:21, 17.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 84/442 [00:04<00:19, 18.00 examples/s][A
Map (num_proc=16):  25%|██▌       | 112/442 [00:06<00:18, 18.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 140/442 [00:07<00:16, 18.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 168/442 [00:09<00:14, 19.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 196/442 [00:10<00:11, 21.01 examples/s][A
Map (num_proc=16):  51%|█████     | 224/442 [00:11<00:09, 22.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 252/442 [00:12<00:07, 24.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 280/442 [00:13<00:06, 24.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 307/442 [00:14<00:05, 25.31 examples/s][A
Map (num_proc=16):  76%|███████▌  | 334/442 [00:15<00:04, 25.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/CEBPE.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/RREB1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/RREB1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/LYL1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/LYL1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/SGK1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/SGK1.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/HES7_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/26 [00:00<?, ? examples/s][A
Map (num_proc=16):   8%|▊         | 2/26 [00:01<00:19,  1.21 examples/s][A
Map (num_proc=16):  15%|█▌        | 4/26 [00:03<00:16,  1.30 examples/s][A
Map (num_proc=16):  23%|██▎       | 6/26 [00:04<00:13,  1.50 examples/s][A
Map (num_proc=16):  31%|███       | 8/26 [00:05<00:11,  1.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 10/26 [00:06<00:09,  1.68 examples/s][A
Map (num_proc=16):  46%|████▌     | 12/26 [00:07<00:07,  1.77 examples/s][A
Map (num_proc=16):  54%|█████▍    | 14/26 [00:08<00:06,  1.84 examples/s][A
Map (num_proc=16):  62%|██████▏   | 16/26 [00:09<00:05,  1.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 18/26 [00:10<00:04,  1.91 examples/s][A
Map (num_proc=16):  77%|███████▋  | 20/26 [00:11<00:03,  1.93 examples/s][A
Map (num_proc=16):  81%|████████  | 21/26 [00:12<00:03,  1.66 examples/s][A
Map (num_proc=16):  85%|████████▍ | 22/26 [00:13<00:02,  1.45 examples/s][A
Map (num_pr

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/HES7.dataset
/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/norman/MEIS1_slice.h5ad has no column attribute 'filter_pass'; tokenizing all cells.
Creating dataset.



Map (num_proc=16):   0%|          | 0/476 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 30/476 [00:01<00:25, 17.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 60/476 [00:03<00:22, 18.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 90/476 [00:04<00:19, 19.41 examples/s][A
Map (num_proc=16):  25%|██▌       | 120/476 [00:06<00:17, 19.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 150/476 [00:07<00:14, 22.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 180/476 [00:08<00:12, 24.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 210/476 [00:09<00:10, 25.86 examples/s][A
Map (num_proc=16):  50%|█████     | 240/476 [00:10<00:08, 26.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 270/476 [00:11<00:07, 27.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 300/476 [00:12<00:06, 28.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 330/476 [00:13<00:05, 28.38 examples/s][A
Map (num_proc=16):  76%|███████▌  | 360/476 [00:14<00:04, 28.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/MEIS1.dataset





In [17]:
def embed_pert_data(all_perts, all_perts_embex, non_exp_genes, token_dir, output_dir, 
                    emb_mode: str = 'cell', batch_size: int = 64, dataset: str = 'norman'):
    
    for pert in all_perts:
        
        if pert in non_exp_genes:
            pass
        
        else:
            
            # initiate EmbExtractor
            embex = EmbExtractor(model_type="Pretrained",
                                 num_classes=0,
                                 emb_mode=emb_mode,
                                 emb_layer=0,
                                 max_ncells=20000,
                                 forward_batch_size=batch_size,
                                 nproc=16)

            # extracts embedding from input data
            # input data is tokenized rank value encodings generated by Geneformer tokenizer (see tokenizing_scRNAseq_data.ipynb)
            # example dataset: https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files/cell_classification/disease_classification/human_dcm_hcm_nf.dataset
            embs = embex.extract_embs("../bin/Geneformer",
                                      f"{d_path}/tokenized_data/{pert}.dataset",
                                      output_dir,
                                      f"{pert}_pert_{emb_mode}")
            
            all_perts_embex[pert] = embs
    
    with gzip.open(f"{d_path}/embeddings/{dataset}_perts_{emb_mode}.pkl.gz", 'wb') as f:
        
        pkl.dump(all_perts_embex, f)
        
    print(f'all_perts_embex saved to {d_path}/embeddings/{dataset}_perts_{emb_mode}.pkl.gz')
        
    return all_perts_embex

In [61]:
all_perts_embex = {}
non_exp_genes = norman_cl_mask.columns[norman_cl_mask.sum() == 0]

tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data'
out_dir = f"{d_path}/embeddings/"

all_perts_embex = embed_pert_data(norman_all_perts, all_perts_embex, non_exp_genes, tokenized_dir, out_dir,dataset='norman')

100%|██████████| 8/8 [00:01<00:00,  4.49it/s]
100%|██████████| 1/1 [00:00<00:00, 32.76it/s]
100%|██████████| 8/8 [00:01<00:00,  4.64it/s]
100%|██████████| 2/2 [00:00<00:00,  6.07it/s]
100%|██████████| 8/8 [00:01<00:00,  4.64it/s]
100%|██████████| 2/2 [00:00<00:00,  6.42it/s]
100%|██████████| 1/1 [00:00<00:00, 38.35it/s]
100%|██████████| 8/8 [00:01<00:00,  5.07it/s]
100%|██████████| 8/8 [00:01<00:00,  5.03it/s]
100%|██████████| 8/8 [00:01<00:00,  4.86it/s]
100%|██████████| 8/8 [00:01<00:00,  5.04it/s]
100%|██████████| 8/8 [00:01<00:00,  4.99it/s]
100%|██████████| 8/8 [00:01<00:00,  5.02it/s]
100%|██████████| 8/8 [00:01<00:00,  4.96it/s]
100%|██████████| 8/8 [00:01<00:00,  4.96it/s]
100%|██████████| 8/8 [00:01<00:00,  4.50it/s]
100%|██████████| 2/2 [00:00<00:00,  5.22it/s]
100%|██████████| 8/8 [00:01<00:00,  4.65it/s]
100%|██████████| 8/8 [00:01<00:00,  4.91it/s]
100%|██████████| 1/1 [00:00<00:00, 27.70it/s]
100%|██████████| 8/8 [00:01<00:00,  5.07it/s]
100%|██████████| 6/6 [00:01<00:00,

In [62]:
for emb in all_perts_embex.values():
    print(emb.shape)

(500, 256)
(3, 256)
(500, 256)
(94, 256)
(500, 256)
(92, 256)
(5, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(111, 256)
(500, 256)
(500, 256)
(8, 256)
(500, 256)
(334, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(292, 256)
(500, 256)
(500, 256)
(500, 256)
(7, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(425, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(158, 256)
(500, 256)
(416, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(78, 256)
(500, 256)
(500, 256)
(43, 256)
(500, 256)
(500, 256)
(271, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(104, 256)
(10, 256)
(27, 256)
(500, 256)
(47, 256)
(38, 256)
(500, 256)
(500, 256)
(189

***
### Replogle datasets
#### Control cells

In [14]:
## Load files for RPE1 data, including mask and check shape

with open(f'{raw_expr_path}/replogle_rpe1_mask_df.pkl', 'rb') as f:
    rpe1_mask = pkl.load(f)
    
rpe1_adata = sc.read_h5ad(f'{raw_expr_path}/replogle_rpe1/replogle_rpe1_raw_counts.h5ad')

print(f'Mask shape: {rpe1_mask.shape}, adata shape: {rpe1_adata.shape}')

## Filter for control cells and check shape
rpe1_ctrl_adata = rpe1_adata[rpe1_adata.obs['perturbation'] == 'control', :]

print(f'adata shape: {rpe1_ctrl_adata.shape}')

KeyboardInterrupt: 

In [None]:
## Load files for K562 data, including mask and check shape

with open(f'{raw_expr_path}/replogle_k562_mask_df.pkl', 'rb') as f:
    k562_mask = pkl.load(f)
    
k562_adata = sc.read_h5ad(f'{raw_expr_path}/replogle_k562/replogle_k562_raw_counts.h5ad')

print(f'Mask shape: {k562_mask.shape}, adata shape: {k562_adata.shape}')

## Filter for control cells and check shape
k562_ctrl_adata = k562_adata[k562_adata.obs['perturbation'] == 'control', :]

print(f'adata shape: {k562_ctrl_adata.shape}')

#### Generate embeddings for control cells

##### Preprocess datasets

In [6]:
def gf_preprocess_replogle(data_directory, output_directory,loom_out_dir):
    
    adata = sc.read_h5ad(data_directory)
    
    print(f'adata shape: {adata.shape}')

    ## Filter for control cells and check shape
    ctrl_adata = adata[adata.obs['perturbation'] == 'control', :]

    print(f'ctrl adata shape: {ctrl_adata.shape}')
    
    #if 'filter_pass' not in ctrl_adata.obs:
     #   ctrl_adata.obs['filter_pass'] = 1

    if 'ensembl_id' not in ctrl_adata.var:
    ## Rename columns for tokenizer to function correctly
        ctrl_adata.var['ensembl_id'] = ctrl_adata.var_names
    
    if 'ncounts' in ctrl_adata.var and 'ncounts' in ctrl_adata.obs:
        #ctrl_adata.var.rename(columns={'ncounts': 'n_counts'}, inplace=True)
        ctrl_adata.obs.rename(columns={'ncounts': 'n_counts'}, inplace=True)
    
    else: 
        ctrl_adata.obs.rename(columns={'read_count': 'n_counts'}, inplace=True)
    
    ctrl_adata.write_h5ad(output_directory)
    ctrl_adata.write_loom(loom_out_dir)
    
    print(f'saved ctrl adata to {output_directory}')
    
    return ctrl_adata

In [7]:
rpe1_ctrl_adata = gf_preprocess_replogle(f'{raw_expr_path}/replogle_rpe1/replogle_rpe1_raw_counts.h5ad', f'{raw_expr_path}/tokenizer_input/ctrl_replogle_rpe1_raw_counts.h5ad', f'{raw_expr_path}/tokenizer_input/ctrl_replogle_rpe1_raw_counts.loom')
k562_ctrl_adata = gf_preprocess_replogle(f'{raw_expr_path}/replogle_k562/replogle_k562_raw_counts.h5ad', f'{raw_expr_path}/tokenizer_input/ctrl_replogle_k562_raw_counts.h5ad', f'{raw_expr_path}/tokenizer_input/ctrl_replogle_k562_raw_counts.loom')

adata shape: (247914, 8749)
ctrl adata shape: (11485, 8749)
saved ctrl adata to /data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/ctrl_replogle_rpe1_raw_counts.h5ad
adata shape: (310385, 8563)
ctrl adata shape: (10691, 8563)
saved ctrl adata to /data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/ctrl_replogle_k562_raw_counts.h5ad


##### Tokenization

In [None]:
### FROM GENEFORMER BUT CHANGED HOW FILE IS LOADED (works on AnnData obj rather than file)

def gf_tokenize_anndata_replogle(adata, custom_attr_name_dict, genelist_dict, 
                        gene_median_dict, gene_token_dict, chunk_size = 512, target_sum = 10_000):
    #adata = ad.read(adata_path, backed="r")

    if custom_attr_name_dict is not None:
        file_cell_metadata = {
            attr_key: [] for attr_key in custom_attr_name_dict.keys()
        }

    coding_miRNA_loc = np.where(
        [genelist_dict.get(i, False) for i in adata.var["ensembl_id"]]
    )[0]
    norm_factor_vector = np.array(
        [
            gene_median_dict[i]
            for i in adata.var["ensembl_id"][coding_miRNA_loc]
        ]
    )
    coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]
    coding_miRNA_tokens = np.array(
        [gene_token_dict[i] for i in coding_miRNA_ids]
    )

    try:
        _ = adata.obs["filter_pass"]
    except KeyError:
        var_exists = False
    else:
        var_exists = True

    if var_exists:
        filter_pass_loc = np.where([i == 1 for i in adata.obs["filter_pass"]])[0]
    elif not var_exists:
        print(
            f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
        )
        filter_pass_loc = np.array([i for i in range(adata.shape[0])])

    tokenized_cells = []

    for i in range(0, len(filter_pass_loc), chunk_size):
        idx = filter_pass_loc[i : i + chunk_size]

        n_counts = adata[idx].obs["n_counts"].values[:, None]
        X_view = adata[idx, coding_miRNA_loc].X
        X_norm = X_view / n_counts * target_sum / norm_factor_vector
        X_norm = sp.csr_matrix(X_norm)

        tokenized_cells += [
            rank_genes(X_norm[i].data, coding_miRNA_tokens[X_norm[i].indices])
            for i in range(X_norm.shape[0])
        ]

        # add custom attributes for subview to dict
        if custom_attr_name_dict is not None:
            for k in file_cell_metadata.keys():
                file_cell_metadata[k] += adata[idx].obs[k].tolist()
        else:
            file_cell_metadata = None

    return tokenized_cells, file_cell_metadata

def rank_genes(gene_vector, gene_tokens):
    """
    Rank gene expression vector.
    """
    # sort by median-scaled gene values
    sorted_indices = np.argsort(-gene_vector)
    return gene_tokens[sorted_indices]


In [None]:
def gf_tokenizer_replogle(custom_attr_name_dict, adata, genelist_dict, 
                          gene_median_dict, gene_token_dict, 
                          output_path, chunk_size = 512, target_sum = 10_000):
    
    tk = TranscriptomeTokenizer(custom_attr_name_dict, nproc=16)

    tokenized_cells, cell_metadata = gf_tokenize_anndata_replogle(adata, 
                                                                  custom_attr_name_dict, 
                                                                  genelist_dict, 
                                                                  gene_median_dict, 
                                                                  gene_token_dict,
                                                                  chunk_size = 512, 
                                                                  target_sum = 10_000)

    tokenized_dataset = tk.create_dataset(tokenized_cells, cell_metadata)

    tokenized_dataset.save_to_disk(output_path)

    print(f'Saved tokenized_dataset to {output_path}')
    
    return tokenized_cells, cell_metadata, tokenized_dataset

In [17]:
# load token dictionary (Ensembl IDs:token)
with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/token_dictionary.pkl', "rb") as f:
    gene_token_dictionary = pickle.load(f)

with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/gene_median_dictionary.pkl', "rb") as f:
    gene_median_dict = pickle.load(f)
    
# gene keys for full vocabulary
gene_keys = list(gene_token_dictionary.keys())

# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
genelist_dict = dict(zip(gene_keys, [True] * len(gene_keys)))

In [190]:
rpe1_output_dir = Path(f'{d_path}/tokenized_data/').resolve()
rpe1_prefix = "rpe1"
rpe1_output_path = str((Path(rpe1_output_dir) / rpe1_prefix).with_suffix(".dataset"))

rpe1_tokenized_cells, rpe1_cell_metadata, rpe1_tokenized_dataset = gf_tokenizer_replogle({"cell_line": "cell_type", "tissue_type": "tissue_type"}, 
                                                                                         rpe1_ctrl_adata, genelist_dict, gene_median_dict, 
                                                                                         gene_token_dict, rpe1_output_path)

  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.


Map (num_proc=16): 100%|██████████| 11485/11485 [00:29<00:00, 383.56 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 11485/11485 [00:00<00:00, 286611.06 examples/s]

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1.dataset





In [191]:
k562_output_dir = Path(f'{d_path}/tokenized_data/').resolve()
k562_prefix = "k562"
k562_output_path = str((Path(k562_output_dir) / k562_prefix).with_suffix(".dataset"))

k562_tokenized_cells, k562_cell_metadata, k562_tokenized_dataset = gf_tokenizer_replogle({"cell_line": "cell_type", "tissue_type": "tissue_type"}, 
                                                                                         k562_ctrl_adata, genelist_dict, gene_median_dict, 
                                                                                         gene_token_dict, k562_output_path)

  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.


Map (num_proc=16): 100%|██████████| 10691/10691 [00:30<00:00, 354.47 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 10691/10691 [00:00<00:00, 297413.32 examples/s]

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562.dataset





##### Embedding extraction

In [192]:
rpe1_ctrl_cell_embex = gf_extract_ctrl_embs(dataset='rpe1')
print(f'AnnData shape: {rpe1_ctrl_adata.shape}, Embedding shape: {rpe1_ctrl_cell_embex.shape}')

100%|██████████| 180/180 [00:39<00:00,  4.52it/s]


In [196]:
k562_ctrl_cell_embex = gf_extract_ctrl_embs(dataset='k562')
print(f'AnnData shape: {k562_ctrl_adata.shape}, Embedding shape: {k562_ctrl_cell_embex.shape}')

100%|██████████| 168/168 [00:36<00:00,  4.59it/s]


AnnData shape: (10691, 8563), Embedding shape: (10691, 256)


***

#### Perturbation data
##### RPE1


In [3]:
## Load data
with open(f'{raw_expr_path}/replogle_rpe1/all_perts.pkl', 'rb') as f:
    rpe1_all_perts = pkl.load(f)
    
len(rpe1_all_perts)

## Load files for RPE1 data, including mask and check shape

with open(f'{raw_expr_path}/replogle_rpe1_mask_df.pkl', 'rb') as f:
    rpe1_mask = pkl.load(f)

In [8]:
## Find perturbations in control data
rpe1_perts = (set(rpe1_ctrl_adata.var_names).intersection(rpe1_all_perts))
len(rpe1_perts)

2106

In [23]:
## Find non expressed genes
rpe1_non_exp_gene = rpe1_mask.columns[rpe1_mask.sum() == 0]

In [20]:
rpe1_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/replogle_rpe1'

prep_pert_data(rpe1_perts, rpe1_mask, rpe1_ctrl_adata, rpe1_pert_path)

  9%|▊         | 180/2106 [00:00<00:02, 901.13it/s]

CYREN adata already prepared.
WDFY3 adata already prepared.
KPNA6 adata already prepared.
NUP133 adata already prepared.
MRPL33 adata already prepared.
RPL32 adata already prepared.
SKA1 adata already prepared.
UQCRQ adata already prepared.
MRPS24 adata already prepared.
UTP25 adata already prepared.
RMI1 adata already prepared.
MPG adata already prepared.
INTS13 adata already prepared.
EIF4A3 adata already prepared.
SPC24 adata already prepared.
ANLN adata already prepared.
SAE1 adata already prepared.
PPP1CA adata already prepared.
ERCC2 adata already prepared.
DOLK adata already prepared.
ORC6 adata already prepared.
NOC2L adata already prepared.
PSTK adata already prepared.
NFATC2IP adata already prepared.
H3F3A adata already prepared.
WEE1 adata already prepared.
YY1 adata already prepared.
MBTPS2 adata already prepared.
NDUFB4 adata already prepared.
EP400 adata already prepared.
FDPS adata already prepared.
SDHC adata already prepared.
CCT5 adata already prepared.
NRDE2 adata al

 17%|█▋        | 366/2106 [00:00<00:01, 917.22it/s]

AP2S1 adata already prepared.
PTK2 adata already prepared.
DCTN3 adata already prepared.
EIF3I adata already prepared.
POLR3D adata already prepared.
EXOSC8 adata already prepared.
PKM adata already prepared.
ADRM1 adata already prepared.
TFPT adata already prepared.
C9orf78 adata already prepared.
NOL8 adata already prepared.
MED4 adata already prepared.
YPEL5 adata already prepared.
GEMIN4 adata already prepared.
IMP4 adata already prepared.
PSMB2 adata already prepared.
MRPL10 adata already prepared.
CRCP adata already prepared.
NBEAL1 adata already prepared.
PSMA2 adata already prepared.
PSMD9 adata already prepared.
BRF1 adata already prepared.
CHMP2A adata already prepared.
ANKS6 adata already prepared.
INO80D adata already prepared.
RBM12 adata already prepared.
NISCH adata already prepared.
KIAA1143 adata already prepared.
INTS3 adata already prepared.
SNRPB adata already prepared.
EIF6 adata already prepared.
GNL3 adata already prepared.
PSMG4 adata already prepared.
TUBG1 ada

 26%|██▌       | 551/2106 [00:00<00:01, 919.95it/s]

CPSF6 adata already prepared.
SNAPC2 adata already prepared.
PCNX3 adata already prepared.
SUPT6H adata already prepared.
TNPO3 adata already prepared.
COPB2 adata already prepared.
ROMO1 adata already prepared.
DCTN1 adata already prepared.
GTF2B adata already prepared.
HARS2 adata already prepared.
RIOK1 adata already prepared.
CENPT adata already prepared.
PRRC2A adata already prepared.
MRPL15 adata already prepared.
CCNQ adata already prepared.
ADAM10 adata already prepared.
DCTN5 adata already prepared.
GPN2 adata already prepared.
DSTYK adata already prepared.
TSFM adata already prepared.
NOL11 adata already prepared.
C1orf131 adata already prepared.
NDUFV2 adata already prepared.
METAP1 adata already prepared.
PGAM5 adata already prepared.
NOL7 adata already prepared.
RPL37 adata already prepared.
RFFL adata already prepared.
PPP1R10 adata already prepared.
INTS10 adata already prepared.
NUP160 adata already prepared.
SYMPK adata already prepared.
CENPW adata already prepared.
U

 35%|███▌      | 740/2106 [00:00<00:01, 930.55it/s]

NOB1 adata already prepared.
PIK3R4 adata already prepared.
MRPS14 adata already prepared.
SNTB2 adata already prepared.
MRPL14 adata already prepared.
NUTF2 adata already prepared.
NLE1 adata already prepared.
UBL5 adata already prepared.
JMJD6 adata already prepared.
WBP1 adata already prepared.
TELO2 adata already prepared.
CLSPN adata already prepared.
NOM1 adata already prepared.
NIFK adata already prepared.
TKT adata already prepared.
STT3B adata already prepared.
CHCHD1 adata already prepared.
EIF2S2 adata already prepared.
VPS37A adata already prepared.
USPL1 adata already prepared.
CIAO1 adata already prepared.
MCL1 adata already prepared.
MTREX adata already prepared.
ESF1 adata already prepared.
TRRAP adata already prepared.
TINF2 adata already prepared.
PPP4R2 adata already prepared.
TTK adata already prepared.
ATP5F1A adata already prepared.
LSM12 adata already prepared.
RBM19 adata already prepared.
EIF4A1 adata already prepared.
SEC13 adata already prepared.
NDUFS3 adata

 45%|████▌     | 954/2106 [00:01<00:01, 1000.64it/s]

SMC1A adata already prepared.
VPS33A adata already prepared.
KIF11 adata already prepared.
NSF adata already prepared.
TEFM adata already prepared.
INO80C adata already prepared.
GNL3L adata already prepared.
EXOC7 adata already prepared.
NUBP2 adata already prepared.
COPS4 adata already prepared.
PELO adata already prepared.
MCM10 adata already prepared.
CCNK adata already prepared.
MRPL44 adata already prepared.
MRPL38 adata already prepared.
ZNF143 adata already prepared.
PSMA7 adata already prepared.
ZMAT2 adata already prepared.
LUC7L3 adata already prepared.
UFM1 adata already prepared.
PPIA adata already prepared.
ARF4 adata already prepared.
RBM39 adata already prepared.
WDR46 adata already prepared.
TPR adata already prepared.
ZNF720 adata already prepared.
PCBP2 adata already prepared.
SNRPD1 adata already prepared.
RPL28 adata already prepared.
WDR82 adata already prepared.
ARPC3 adata already prepared.
STIL adata already prepared.
RPS5 adata already prepared.
POP4 adata alr

 55%|█████▌    | 1162/2106 [00:01<00:00, 1006.47it/s]

ZC3H13 adata already prepared.
TTF2 adata already prepared.
PRMT5 adata already prepared.
ACTR10 adata already prepared.
TTI2 adata already prepared.
SDHD adata already prepared.
TRAPPC4 adata already prepared.
PRPF40A adata already prepared.
SNRPD2 adata already prepared.
UBR4 adata already prepared.
RPE adata already prepared.
SF3B1 adata already prepared.
POLR1D adata already prepared.
NDUFA8 adata already prepared.
CHCHD3 adata already prepared.
SNAPC1 adata already prepared.
DROSHA adata already prepared.
OSBP adata already prepared.
GPN1 adata already prepared.
DIMT1 adata already prepared.
ORC4 adata already prepared.
GET3 adata already prepared.
PTMA adata already prepared.
CNOT3 adata already prepared.
VPS29 adata already prepared.
DERL2 adata already prepared.
SON adata already prepared.
GMPPB adata already prepared.
SRSF7 adata already prepared.
COG1 adata already prepared.
NFS1 adata already prepared.
POLD3 adata already prepared.
OIP5 adata already prepared.
SLC25A3 adata 

 60%|█████▉    | 1263/2106 [00:01<00:00, 988.36it/s] 

RFC4 adata already prepared.
HAUS4 adata already prepared.
CSNK2B adata already prepared.
MFN2 adata already prepared.
UFL1 adata already prepared.
WDHD1 adata already prepared.
SHC1 adata already prepared.
CSDE1 adata already prepared.
SRP19 adata already prepared.
SAP30BP adata already prepared.
TOMM20 adata already prepared.
TP53I13 adata already prepared.
METAP2 adata already prepared.
ZNF236 adata already prepared.
SMG5 adata already prepared.
COG2 adata already prepared.
ELP6 adata already prepared.
ABCB7 adata already prepared.
RPL12 adata already prepared.
NELFCD adata already prepared.
GART adata already prepared.
PDAP1 adata already prepared.
BOD1L1 adata already prepared.
KIAA0586 adata already prepared.
NUP62 adata already prepared.
CYFIP1 adata already prepared.
UXS1 adata already prepared.
PHF5A adata already prepared.
LTBP4 adata already prepared.
PPME1 adata already prepared.
AC118549.1 adata already prepared.
TTC4 adata already prepared.
TRAPPC3 adata already prepared.

 69%|██████▉   | 1460/2106 [00:01<00:00, 960.20it/s]

COMTD1 adata already prepared.
CAPZB adata already prepared.
TTC27 adata already prepared.
HSD17B12 adata already prepared.
ALG14 adata already prepared.
ARGLU1 adata already prepared.
CCDC6 adata already prepared.
LARS2 adata already prepared.
RPN1 adata already prepared.
FIP1L1 adata already prepared.
PREB adata already prepared.
RPP30 adata already prepared.
DDX41 adata already prepared.
PSMD11 adata already prepared.
MED27 adata already prepared.
MRPL39 adata already prepared.
TBL3 adata already prepared.
PRPF4B adata already prepared.
TRAPPC5 adata already prepared.
BTAF1 adata already prepared.
GSPT1 adata already prepared.
GLRX5 adata already prepared.
AAR2 adata already prepared.
ZDHHC7 adata already prepared.
SNAPC5 adata already prepared.
SSBP1 adata already prepared.
ESYT2 adata already prepared.
FXN adata already prepared.
HNRNPM adata already prepared.
GINS4 adata already prepared.
SPOUT1 adata already prepared.
UQCRC2 adata already prepared.
SLC25A26 adata already prepare

 78%|███████▊  | 1653/2106 [00:01<00:00, 950.20it/s]

SEC62 adata already prepared.
RNF40 adata already prepared.
BUB1 adata already prepared.
UNC50 adata already prepared.
CWC15 adata already prepared.
SRPRB adata already prepared.
BRK1 adata already prepared.
CUL2 adata already prepared.
BCAR1 adata already prepared.
MED11 adata already prepared.
POLR2C adata already prepared.
TERF2 adata already prepared.
TUFM adata already prepared.
ATP5PO adata already prepared.
SUPT16H adata already prepared.
NUP88 adata already prepared.
BPTF adata already prepared.
FNTA adata already prepared.
BRIP1 adata already prepared.
SP2 adata already prepared.
RAB18 adata already prepared.
PMPCB adata already prepared.
KRR1 adata already prepared.
RPS3 adata already prepared.
RPA2 adata already prepared.
CYCS adata already prepared.
KATNB1 adata already prepared.
XRN2 adata already prepared.
NSA2 adata already prepared.
LSM2 adata already prepared.
UTP3 adata already prepared.
TWSG1 adata already prepared.
DKC1 adata already prepared.
FARS2 adata already pr

 88%|████████▊ | 1844/2106 [00:01<00:00, 938.84it/s]

SUPT20H adata already prepared.
NAA10 adata already prepared.
TRAPPC1 adata already prepared.
NCAPD3 adata already prepared.
EIF1 adata already prepared.
CHORDC1 adata already prepared.
DRG1 adata already prepared.
EEF1A1 adata already prepared.
ZNF706 adata already prepared.
ATP5F1C adata already prepared.
SFPQ adata already prepared.
C5orf30 adata already prepared.
RPP14 adata already prepared.
SF3A3 adata already prepared.
DDX19A adata already prepared.
PSMG2 adata already prepared.
SRSF6 adata already prepared.
COPG1 adata already prepared.
SF3B3 adata already prepared.
SMC5 adata already prepared.
GEMIN5 adata already prepared.
NPM3 adata already prepared.
VHL adata already prepared.
LRR1 adata already prepared.
SNAPC3 adata already prepared.
BTF3L4 adata already prepared.
PNO1 adata already prepared.
ECT2 adata already prepared.
POLR3A adata already prepared.
PSMG3 adata already prepared.
SRP54 adata already prepared.
SCFD1 adata already prepared.
MRPS5 adata already prepared.
BR

100%|██████████| 2106/2106 [00:02<00:00, 951.25it/s]

SEC61B adata already prepared.
GTF2A1 adata already prepared.
RPS6 adata already prepared.
HMGCS1 adata already prepared.
IARS adata already prepared.
RANGAP1 adata already prepared.
POLA2 adata already prepared.
MRPS21 adata already prepared.
PGK1 adata already prepared.
TBPL1 adata already prepared.
ELP3 adata already prepared.
ALYREF adata already prepared.
BUD13 adata already prepared.
DNMT1 adata already prepared.
IFITM2 adata already prepared.
BUD23 adata already prepared.
THOC6 adata already prepared.
DYNLL1 adata already prepared.
PSMB3 adata already prepared.
SNRNP25 adata already prepared.
PIGH adata already prepared.
UBE2I adata already prepared.
RPL23A adata already prepared.
CHMP6 adata already prepared.
CLPB adata already prepared.
PPIH adata already prepared.
RNF4 adata already prepared.
TFIP11 adata already prepared.
MRPS9 adata already prepared.
SAMM50 adata already prepared.
SRCAP adata already prepared.
USP10 adata already prepared.
NDC80 adata already prepared.
NMT1




In [10]:
def tokenize_replogle_pert_data(all_perts, non_exp_genes, input_dir, output_dir, 
                                genelist_dict: dict, gene_median_dict: dict, gene_token_dict: dict):
        
    for pert in tqdm(all_perts):
        
        if pert in non_exp_genes:
            pass
        
        else:
            in_dir = f'{input_dir}/{pert}_slice.h5ad'
            adata = sc.read_h5ad(in_dir)

            prefix = f'{pert}'
            out_path = (Path(output_dir) / prefix).with_suffix(".dataset")

            if out_path.is_dir():
                print(f'{out_path} already exists {pert} adata already tokenized.')
            
            else: 
                tokenized = gf_tokenizer_replogle({"cell_line": "cell_type", "tissue_type": "tissue_type"}, 
                                                   adata, genelist_dict, gene_median_dict, 
                                                   gene_token_dict, str(out_path))

In [24]:
# load token dictionary (Ensembl IDs:token)
with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/token_dictionary.pkl', "rb") as f:
    gene_token_dictionary = pickle.load(f)

with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/gene_median_dictionary.pkl', "rb") as f:
    gene_median_dict = pickle.load(f)
    
# gene keys for full vocabulary
gene_keys = list(gene_token_dictionary.keys())

# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
genelist_dict = dict(zip(gene_keys, [True] * len(gene_keys)))

In [27]:
rpe1_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/replogle_rpe1'
rpe1_tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert'

rpe1_non_exp_gene = rpe1_mask.columns[rpe1_mask.sum() == 0]

tokenize_replogle_pert_data(rpe1_perts, rpe1_non_exp_gene, rpe1_pert_path, rpe1_tokenized_dir,
                           genelist_dict, gene_median_dict, gene_token_dictionary)

  0%|          | 2/2106 [00:00<03:14, 10.81it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CYREN.dataset already exists CYREN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDFY3.dataset already exists WDFY3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KPNA6.dataset already exists KPNA6 adata already tokenized.


  0%|          | 6/2106 [00:00<03:05, 11.35it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP133.dataset already exists NUP133 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL33.dataset already exists MRPL33 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL32.dataset already exists RPL32 adata already tokenized.


  0%|          | 8/2106 [00:00<02:55, 11.97it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SKA1.dataset already exists SKA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCRQ.dataset already exists UQCRQ adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS24.dataset already exists MRPS24 adata already tokenized.


  1%|          | 12/2106 [00:01<02:51, 12.24it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP25.dataset already exists UTP25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RMI1.dataset already exists RMI1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MPG.dataset already exists MPG adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.15 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.30 examples/s][A
  1%|          | 12/2106 [00:15<02:51, 12.24it/s]1<00:07, 28.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS13.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF4A3.dataset already exists EIF4A3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPC24.dataset already exists SPC24 adata already tokenized.


  1%|          | 18/2106 [00:24<52:31,  1.51s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANLN.dataset already exists ANLN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAE1.dataset already exists SAE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1CA.dataset already exists PPP1CA adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ERCC2.dataset already exists ERCC2 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.18 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.19 examples/s][A
  1%|          | 18/2106 [00:35<52:31,  1.51s/it]6<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DOLK.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ORC6.dataset already exists ORC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOC2L.dataset already exists NOC2L adata already tokenized.


  1%|          | 25/2106 [00:46<55:33,  1.60s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSTK.dataset already exists PSTK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NFATC2IP.dataset already exists NFATC2IP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/H3F3A.dataset already exists H3F3A adata already tokenized.


  1%|▏         | 27/2106 [00:46<38:27,  1.11s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WEE1.dataset already exists WEE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YY1.dataset already exists YY1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MBTPS2.dataset already exists MBTPS2 adata already tokenized.


  1%|▏         | 31/2106 [00:46<19:29,  1.77it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFB4.dataset already exists NDUFB4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EP400.dataset already exists EP400 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FDPS.dataset already exists FDPS adata already tokenized.


  2%|▏         | 33/2106 [00:46<14:19,  2.41it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDHC.dataset already exists SDHC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCT5.dataset already exists CCT5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NRDE2.dataset already exists NRDE2 adata already tokenized.


  2%|▏         | 37/2106 [00:47<08:23,  4.11it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL3.dataset already exists RPL3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABCB10.dataset already exists ABCB10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF2.dataset already exists SRSF2 adata already tokenized.


  2%|▏         | 39/2106 [00:47<06:42,  5.14it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPA2.dataset already exists PPA2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BCR.dataset already exists BCR adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MASTL.dataset already exists MASTL adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.31 examples/s][A
  2%|▏         | 39/2106 [01:05<06:42,  5.14it/s]4<00:06, 22.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC10.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP2A2.dataset already exists ATP2A2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNM2.dataset already exists DNM2 adata already tokenized.


  2%|▏         | 44/2106 [01:10<1:07:58,  1.98s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMGN2.dataset already exists HMGN2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSMCE2.dataset already exists NSMCE2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.82 examples/s][A
  2%|▏         | 44/2106 [01:25<1:07:58,  1.98s/it]00:08, 26.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELOB.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA35.dataset already exists NAA35 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAMTOR3.dataset already exists LAMTOR3 adata already tokenized.


  2%|▏         | 51/2106 [01:34<1:04:08,  1.87s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CMC4.dataset already exists CMC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC35B1.dataset already exists SLC35B1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBGCP5.dataset already exists TUBGCP5 adata already tokenized.


  3%|▎         | 53/2106 [01:34<44:17,  1.29s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PUF60.dataset already exists PUF60 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ENY2.dataset already exists ENY2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPC.dataset already exists HNRNPC adata already tokenized.


  3%|▎         | 57/2106 [01:34<22:19,  1.53it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OGFOD1.dataset already exists OGFOD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PGAM1.dataset already exists PGAM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN4.dataset already exists DCTN4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.26 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.80 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
  3%|▎         | 57/2106 [01:45<22:19,  1.53it/s]7<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAMTOR2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF787.dataset already exists ZNF787 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRKRIP1.dataset already exists PRKRIP1 adata already tokenized.


  3%|▎         | 62/2106 [02:23<2:22:50,  4.19s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HDAC3.dataset already exists HDAC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IARS2.dataset already exists IARS2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:23, 19.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 25.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.03 examples/s][A
  3%|▎         | 62/2106 [02:35<2:22:50,  4.19s/it]00:09, 30.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 27.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OXA1L.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HDAC7.dataset already exists HDAC7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SART1.dataset already exists SART1 adata already tokenized.


  3%|▎         | 69/2106 [02:44<1:13:35,  2.17s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3B.dataset already exists POLR3B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPAG7.dataset already exists SPAG7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ENO1.dataset already exists ENO1 adata already tokenized.


  3%|▎         | 71/2106 [02:45<50:08,  1.48s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UHRF1.dataset already exists UHRF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF10.dataset already exists TAF10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3F.dataset already exists POLR3F adata already tokenized.


  4%|▎         | 75/2106 [02:45<24:44,  1.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBE1.dataset already exists TUBE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAMTOR4.dataset already exists LAMTOR4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZBTB17.dataset already exists ZBTB17 adata already tokenized.


  4%|▎         | 77/2106 [02:45<17:54,  1.89it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS23.dataset already exists MRPS23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC7A5.dataset already exists SLC7A5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SCD.dataset already exists SCD adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.33 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ISY1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ACTR6.dataset already exists ACTR6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R12A.dataset already exists PPP1R12A adata already tokenized.


  4%|▍         | 84/2106 [03:09<51:32,  1.53s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMC4.dataset already exists PSMC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EMC7.dataset already exists EMC7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC7A6OS.dataset already exists SLC7A6OS adata already tokenized.


  4%|▍         | 86/2106 [03:10<35:55,  1.07s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX23.dataset already exists DDX23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IGBP1.dataset already exists IGBP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.27 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.27 examples/s][A
  4%|▍         | 86/2106 [03:25<35:55,  1.07s/it]2<00:08, 25.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBLIM1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF10.dataset already exists SRSF10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBCC.dataset already exists TBCC adata already tokenized.


  4%|▍         | 90/2106 [03:33<1:28:22,  2.63s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COG8.dataset already exists COG8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL26L1.dataset already exists RPL26L1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
  4%|▍         | 90/2106 [03:45<1:28:22,  2.63s/it]00:14, 21.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM42.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAE1.dataset already exists RAE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNRD1.dataset already exists ZNRD1 adata already tokenized.


  5%|▍         | 97/2106 [03:57<1:06:28,  1.99s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF131.dataset already exists ZNF131 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DRAP1.dataset already exists DRAP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC16.dataset already exists CDC16 adata already tokenized.


  5%|▍         | 99/2106 [03:57<45:34,  1.36s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CRLS1.dataset already exists CRLS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBGCP4.dataset already exists TUBGCP4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.53 examples/s][A
  5%|▍         | 99/2106 [04:15<45:34,  1.36s/it]4<00:06, 24.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK11A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPA1.dataset already exists RPA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS2.dataset already exists MRPS2 adata already tokenized.


  5%|▍         | 105/2106 [04:22<1:01:45,  1.85s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MSL1.dataset already exists MSL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS2.dataset already exists INTS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LDB1.dataset already exists LDB1 adata already tokenized.


  5%|▌         | 107/2106 [04:22<41:53,  1.26s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBBP5.dataset already exists RBBP5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM5.dataset already exists MCM5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAB4B.dataset already exists RAB4B adata already tokenized.


  5%|▌         | 111/2106 [04:22<20:45,  1.60it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SHQ1.dataset already exists SHQ1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SARS.dataset already exists SARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TFRC.dataset already exists TFRC adata already tokenized.


  5%|▌         | 113/2106 [04:22<15:00,  2.21it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMA3.dataset already exists PSMA3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2L3.dataset already exists UBE2L3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CABIN1.dataset already exists CABIN1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.64 examples/s][A
  5%|▌         | 113/2106 [04:35<15:00,  2.21it/s]<00:13, 20.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC2A8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDCD6.dataset already exists PDCD6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS3.dataset already exists COPS3 adata already tokenized.


  6%|▌         | 120/2106 [04:47<51:26,  1.55s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TOP2A.dataset already exists TOP2A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL17.dataset already exists MRPL17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARFRP1.dataset already exists ARFRP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUFIP1.dataset already exists NUFIP1 adata already tokenized.


  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS12.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUBP1.dataset already exists NUBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GEMIN8.dataset already exists GEMIN8 adata already tokenized.


  6%|▌         | 127/2106 [05:09<53:48,  1.63s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SFSWAP.dataset already exists SFSWAP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GPS2.dataset already exists GPS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCOA4.dataset already exists NCOA4 adata already tokenized.


  6%|▌         | 129/2106 [05:09<37:23,  1.13s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2B3.dataset already exists EIF2B3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS15.dataset already exists RPS15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GRPEL1.dataset already exists GRPEL1 adata already tokenized.


  6%|▋         | 133/2106 [05:09<19:06,  1.72it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF3.dataset already exists PRPF3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPNE1.dataset already exists CPNE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCRB.dataset already exists UQCRB adata already tokenized.


  6%|▋         | 135/2106 [05:10<14:03,  2.34it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCAPH.dataset already exists NCAPH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRXL2A.dataset already exists PRXL2A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL10.dataset already exists NOL10 adata already tokenized.


  7%|▋         | 139/2106 [05:10<08:07,  4.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPLP0.dataset already exists RPLP0 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PARN.dataset already exists PARN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMU1.dataset already exists SMU1 adata already tokenized.


  7%|▋         | 141/2106 [05:10<06:26,  5.08it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRELID3B.dataset already exists PRELID3B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRP1.dataset already exists RRP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ALG2.dataset already exists ALG2 adata already tokenized.


  7%|▋         | 145/2106 [05:10<04:25,  7.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DPH2.dataset already exists DPH2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDC1.dataset already exists NDC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GLB1.dataset already exists GLB1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CEBPZ.dataset already exists CEBPZ adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 21.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.65 examples/s][A
  7%|▋         | 145/2106 [05:25<04:25,  7.37it/s]<00:08, 24.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDRG1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MSRB1.dataset already exists MSRB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPT1.dataset already exists TPT1 adata already tokenized.


  7%|▋         | 152/2106 [05:34<45:37,  1.40s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SH3KBP1.dataset already exists SH3KBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS25.dataset already exists MRPS25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAMTOR1.dataset already exists LAMTOR1 adata already tokenized.


  7%|▋         | 154/2106 [05:35<31:59,  1.02it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ORC3.dataset already exists ORC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FKBP9.dataset already exists FKBP9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FYN.dataset already exists FYN adata already tokenized.


  7%|▋         | 156/2106 [05:35<22:49,  1.42it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KRT10.dataset already exists KRT10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.19 examples/s][A
  7%|▋         | 156/2106 [05:45<22:49,  1.42it/s]<00:15, 21.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PMF1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YARS2.dataset already exists YARS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM4.dataset already exists MCM4 adata already tokenized.


  8%|▊         | 160/2106 [06:00<1:24:51,  2.62s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CUL1.dataset already exists CUL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DONSON.dataset already exists DONSON adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.44 examples/s][A
  8%|▊         | 160/2106 [06:15<1:24:51,  2.62s/it]0:09, 23.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNF8.dataset already exists SNF8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNHIT6.dataset already exists ZNHIT6 adata already tokenized.


  8%|▊         | 167/2106 [06:24<1:04:27,  1.99s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DYNC1I2.dataset already exists DYNC1I2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RACK1.dataset already exists RACK1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR75.dataset already exists WDR75 adata already tokenized.


  8%|▊         | 169/2106 [06:24<44:12,  1.37s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS4X.dataset already exists RPS4X adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HUWE1.dataset already exists HUWE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEC63.dataset already exists SEC63 adata already tokenized.


  8%|▊         | 171/2106 [06:24<30:56,  1.04it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHCHD2.dataset already exists CHCHD2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.52 examples/s][A
  8%|▊         | 171/2106 [06:35<30:56,  1.04it/s]<00:09, 29.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNOT2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NVL.dataset already exists NVL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DR1.dataset already exists DR1 adata already tokenized.


  8%|▊         | 177/2106 [06:46<51:02,  1.59s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDCA5.dataset already exists CDCA5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR1A.dataset already exists POLR1A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS9.dataset already exists RPS9 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRCA2.dataset already exists BRCA2 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.21 examples/s][A
  8%|▊         | 177/2106 [07:05<51:02,  1.59s/it]<00:05, 21.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SSU72.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF574.dataset already exists ZNF574 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2B4.dataset already exists EIF2B4 adata already tokenized.


  9%|▊         | 184/2106 [07:11<58:22,  1.82s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DSN1.dataset already exists DSN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ADNP2.dataset already exists ADNP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAM133B.dataset already exists FAM133B adata already tokenized.


  9%|▉         | 186/2106 [07:11<40:16,  1.26s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED31.dataset already exists MED31 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SETD2.dataset already exists SETD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AP2S1.dataset already exists AP2S1 adata already tokenized.


  9%|▉         | 188/2106 [07:11<28:18,  1.13it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTK2.dataset already exists PTK2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN3.dataset already exists DCTN3 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
  9%|▉         | 188/2106 [07:26<28:18,  1.13it/s]<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3I.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3D.dataset already exists POLR3D adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.74 examples/s][A
  9%|▉         | 191/2106 [07:56<2:05:25,  3.93s/it]0:08, 21.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PKM.dataset already exists PKM adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ADRM1.dataset already exists ADRM1 adata already tokenized.


  9%|▉         | 197/2106 [08:04<1:21:46,  2.57s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TFPT.dataset already exists TFPT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C9orf78.dataset already exists C9orf78 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL8.dataset already exists NOL8 adata already tokenized.


  9%|▉         | 199/2106 [08:04<54:50,  1.73s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED4.dataset already exists MED4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YPEL5.dataset already exists YPEL5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GEMIN4.dataset already exists GEMIN4 adata already tokenized.


 10%|▉         | 203/2106 [08:04<26:32,  1.20it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IMP4.dataset already exists IMP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMB2.dataset already exists PSMB2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL10.dataset already exists MRPL10 adata already tokenized.


 10%|▉         | 205/2106 [08:05<18:57,  1.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CRCP.dataset already exists CRCP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NBEAL1.dataset already exists NBEAL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMA2.dataset already exists PSMA2 adata already tokenized.


 10%|▉         | 209/2106 [08:05<10:21,  3.05it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD9.dataset already exists PSMD9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRF1.dataset already exists BRF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP2A.dataset already exists CHMP2A adata already tokenized.


 10%|█         | 211/2106 [08:05<07:58,  3.96it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANKS6.dataset already exists ANKS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INO80D.dataset already exists INO80D adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM12.dataset already exists RBM12 adata already tokenized.


 10%|█         | 215/2106 [08:05<05:11,  6.06it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NISCH.dataset already exists NISCH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIAA1143.dataset already exists KIAA1143 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS3.dataset already exists INTS3 adata already tokenized.


 10%|█         | 217/2106 [08:06<04:22,  7.19it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPB.dataset already exists SNRPB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF6.dataset already exists EIF6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GNL3.dataset already exists GNL3 adata already tokenized.


 10%|█         | 221/2106 [08:06<03:22,  9.32it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMG4.dataset already exists PSMG4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBG1.dataset already exists TUBG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3A.dataset already exists EIF3A adata already tokenized.


 11%|█         | 223/2106 [08:06<03:04, 10.20it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PALB2.dataset already exists PALB2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDCD11.dataset already exists PDCD11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPCS3.dataset already exists SPCS3 adata already tokenized.


 11%|█         | 227/2106 [08:06<02:49, 11.09it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR25.dataset already exists WDR25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMARCC1.dataset already exists SMARCC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BCAS2.dataset already exists BCAS2 adata already tokenized.


 11%|█         | 229/2106 [08:06<02:41, 11.61it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LONP1.dataset already exists LONP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL15.dataset already exists RPL15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC7.dataset already exists THOC7 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF1.dataset already exists SF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF5.dataset already exists EIF5 adata already tokenized.


 11%|█         | 236/2106 [08:29<39:54,  1.28s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AP2A1.dataset already exists AP2A1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM25.dataset already exists RBM25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC26.dataset already exists CDC26 adata already tokenized.


 11%|█▏        | 240/2106 [08:29<19:53,  1.56it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2E.dataset already exists POLR2E adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARCN1.dataset already exists ARCN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFAF5.dataset already exists NDUFAF5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMB6.dataset already exists PSMB6 adata already tokenized.


 11%|█▏        | 242/2106 [08:29<14:27,  2.15it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RUVBL1.dataset already exists RUVBL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRTO4.dataset already exists MRTO4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP205.dataset already exists NUP205 adata already tokenized.


 12%|█▏        | 246/2106 [08:29<08:10,  3.80it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL49.dataset already exists MRPL49 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZWINT.dataset already exists ZWINT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRCA1.dataset already exists BRCA1 adata already tokenized.


 12%|█▏        | 248/2106 [08:30<06:21,  4.87it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBR5.dataset already exists UBR5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF8.dataset already exists RNF8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNW1.dataset already exists SNW1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.21 examples/s][A
 12%|█▏        | 248/2106 [08:46<06:21,  4.87it/s]<00:09, 23.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FCF1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSME1.dataset already exists PSME1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OGT.dataset already exists OGT adata already tokenized.


 12%|█▏        | 255/2106 [08:56<47:36,  1.54s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPSA.dataset already exists RPSA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCT8.dataset already exists CCT8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CEP97.dataset already exists CEP97 adata already tokenized.


 12%|█▏        | 257/2106 [08:56<33:15,  1.08s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL11.dataset already exists MRPL11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP2.dataset already exists NOP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IRF2BP2.dataset already exists IRF2BP2 adata already tokenized.


 12%|█▏        | 261/2106 [08:56<17:06,  1.80it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL13A.dataset already exists RPL13A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC5.dataset already exists THOC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METTL17.dataset already exists METTL17 adata already tokenized.


 12%|█▏        | 263/2106 [08:56<12:35,  2.44it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBA52.dataset already exists UBA52 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS27.dataset already exists MRPS27 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCBP1.dataset already exists PCBP1 adata already tokenized.


 13%|█▎        | 267/2106 [08:57<07:26,  4.12it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPD3.dataset already exists SNRPD3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TCP1.dataset already exists TCP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DENR.dataset already exists DENR adata already tokenized.


 13%|█▎        | 269/2106 [08:57<05:55,  5.17it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP93.dataset already exists NUP93 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL31.dataset already exists RPL31 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PNPT1.dataset already exists PNPT1 adata already tokenized.


 13%|█▎        | 273/2106 [08:57<04:06,  7.43it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MMS19.dataset already exists MMS19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAS1L.dataset already exists LAS1L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL7A.dataset already exists RPL7A adata already tokenized.


 13%|█▎        | 275/2106 [08:57<03:36,  8.45it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX54.dataset already exists DDX54 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUPV3L1.dataset already exists SUPV3L1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBC.dataset already exists UBC adata already tokenized.


 13%|█▎        | 277/2106 [08:58<03:16,  9.32it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELOF1.dataset already exists ELOF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP10.dataset already exists NOP10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.77 examples/s][A
 13%|█▎        | 277/2106 [09:16<03:16,  9.32it/s]<00:06, 23.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRP68.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPRD1B.dataset already exists RPRD1B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2B5.dataset already exists EIF2B5 adata already tokenized.


 13%|█▎        | 284/2106 [09:21<41:17,  1.36s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2M.dataset already exists UBE2M adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KRI1.dataset already exists KRI1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IPO9.dataset already exists IPO9 adata already tokenized.


 14%|█▎        | 286/2106 [09:21<28:58,  1.05it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIF20A.dataset already exists KIF20A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LETM1.dataset already exists LETM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TADA2A.dataset already exists TADA2A adata already tokenized.


 14%|█▍        | 290/2106 [09:21<15:01,  2.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MIS18A.dataset already exists MIS18A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IWS1.dataset already exists IWS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBA2.dataset already exists UBA2 adata already tokenized.


 14%|█▍        | 292/2106 [09:22<11:11,  2.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNOT11.dataset already exists CNOT11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPAN.dataset already exists PPAN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C7orf50.dataset already exists C7orf50 adata already tokenized.


 14%|█▍        | 294/2106 [09:22<08:33,  3.53it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MMGT1.dataset already exists MMGT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM2.dataset already exists MCM2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.63 examples/s][A
 14%|█▍        | 294/2106 [09:36<08:33,  3.53it/s]<00:13, 20.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TARS2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBB2A.dataset already exists TUBB2A adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.53 examples/s][A
 14%|█▍        | 297/2106 [10:06<1:39:06,  3.29s/it]0:06, 25.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUGP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL4.dataset already exists RPL4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA30.dataset already exists NAA30 adata already tokenized.


 14%|█▍        | 303/2106 [10:12<1:11:42,  2.39s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZW10.dataset already exists ZW10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CARS.dataset already exists CARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC11.dataset already exists ANAPC11 adata already tokenized.


 14%|█▍        | 305/2106 [10:12<48:12,  1.61s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FASN.dataset already exists FASN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HJURP.dataset already exists HJURP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX5B.dataset already exists COX5B adata already tokenized.


 15%|█▍        | 309/2106 [10:13<23:28,  1.28it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUDT15.dataset already exists NUDT15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTI1.dataset already exists TTI1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK1.dataset already exists CDK1 adata already tokenized.


 15%|█▍        | 311/2106 [10:13<16:53,  1.77it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATF4.dataset already exists ATF4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TNRC6A.dataset already exists TNRC6A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPWD1.dataset already exists PPWD1 adata already tokenized.


 15%|█▍        | 315/2106 [10:13<09:20,  3.19it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF1AX.dataset already exists EIF1AX adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAC3.dataset already exists RAC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNTTIP2.dataset already exists DNTTIP2 adata already tokenized.


 15%|█▌        | 317/2106 [10:13<07:13,  4.13it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AHCTF1.dataset already exists AHCTF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS35.dataset already exists MRPS35 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCRS1.dataset already exists MCRS1 adata already tokenized.


 15%|█▌        | 319/2106 [10:14<05:46,  5.16it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPH1.dataset already exists HNRNPH1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.25 examples/s][A
 15%|█▌        | 319/2106 [10:26<05:46,  5.16it/s]<00:12, 23.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBA1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DTL.dataset already exists DTL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NEDD8.dataset already exists NEDD8 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNN2.dataset already exists CNN2 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.43 examples/s][A
 15%|█▌        | 322/2106 [10:56<1:33:01,  3.13s/it]0:10, 21.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMEM258.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3E.dataset already exists EIF3E adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.51 examples/s][A
 15%|█▌        | 325/2106 [11:16<2:32:06,  5.12s/it]0:16, 20.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRP7A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP2CB.dataset already exists PPP2CB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP2CA.dataset already exists PPP2CA adata already tokenized.


 16%|█▌        | 331/2106 [11:33<1:26:57,  2.94s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPC25.dataset already exists SPC25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POP7.dataset already exists POP7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRN3.dataset already exists RRN3 adata already tokenized.


 16%|█▌        | 333/2106 [11:33<58:04,  1.97s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABCF1.dataset already exists ABCF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAT10.dataset already exists NAT10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL16.dataset already exists MRPL16 adata already tokenized.


 16%|█▌        | 337/2106 [11:33<27:47,  1.06it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHFR.dataset already exists DHFR adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLL.dataset already exists POLL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PIP4K2C.dataset already exists PIP4K2C adata already tokenized.


 16%|█▌        | 339/2106 [11:33<19:47,  1.49it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSG1.dataset already exists LSG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIL2.dataset already exists PPIL2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRP15.dataset already exists RRP15 adata already tokenized.


 16%|█▋        | 343/2106 [11:33<10:38,  2.76it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMC3.dataset already exists PSMC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP14.dataset already exists NOP14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF207.dataset already exists ZNF207 adata already tokenized.


 16%|█▋        | 345/2106 [11:34<08:06,  3.62it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KTI12.dataset already exists KTI12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRNP200.dataset already exists SNRNP200 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRGBP.dataset already exists MRGBP adata already tokenized.


 16%|█▋        | 347/2106 [11:34<06:17,  4.65it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED14.dataset already exists MED14 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
 16%|█▋        | 347/2106 [11:46<06:17,  4.65it/s]<00:11, 24.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PLK4.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEC16A.dataset already exists SEC16A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNIH4.dataset already exists CNIH4 adata already tokenized.


 17%|█▋        | 353/2106 [11:56<41:15,  1.41s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS28.dataset already exists VPS28 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SKP1.dataset already exists SKP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPUSD3.dataset already exists RPUSD3 adata already tokenized.


 17%|█▋        | 355/2106 [11:56<28:24,  1.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL9.dataset already exists RPL9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COTL1.dataset already exists COTL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIL1.dataset already exists PPIL1 adata already tokenized.


 17%|█▋        | 359/2106 [11:57<14:27,  2.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HARS.dataset already exists HARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS34.dataset already exists MRPS34 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL2.dataset already exists MRPL2 adata already tokenized.


 17%|█▋        | 361/2106 [11:57<10:44,  2.71it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMARCB1.dataset already exists SMARCB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MPHOSPH10.dataset already exists MPHOSPH10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS20.dataset already exists RPS20 adata already tokenized.


 17%|█▋        | 365/2106 [11:57<06:25,  4.52it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF3A.dataset already exists GTF3A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDHA1.dataset already exists PDHA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTNNBL1.dataset already exists CTNNBL1 adata already tokenized.


 17%|█▋        | 367/2106 [11:57<05:08,  5.64it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS24.dataset already exists RPS24 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS3A.dataset already exists RPS3A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD3.dataset already exists PSMD3 adata already tokenized.


 18%|█▊        | 371/2106 [11:58<03:43,  7.75it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PHF10.dataset already exists PHF10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL34.dataset already exists MRPL34 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCBP2.dataset already exists NCBP2 adata already tokenized.


 18%|█▊        | 373/2106 [11:58<03:20,  8.63it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARID5B.dataset already exists ARID5B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPSF6.dataset already exists CPSF6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNAPC2.dataset already exists SNAPC2 adata already tokenized.


 18%|█▊        | 377/2106 [11:58<02:47, 10.33it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCNX3.dataset already exists PCNX3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUPT6H.dataset already exists SUPT6H adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TNPO3.dataset already exists TNPO3 adata already tokenized.


 18%|█▊        | 379/2106 [11:58<02:34, 11.15it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPB2.dataset already exists COPB2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ROMO1.dataset already exists ROMO1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN1.dataset already exists DCTN1 adata already tokenized.


 18%|█▊        | 383/2106 [11:59<02:27, 11.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2B.dataset already exists GTF2B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HARS2.dataset already exists HARS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RIOK1.dataset already exists RIOK1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPT.dataset already exists CENPT adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.21 examples/s][A
 18%|█▊        | 383/2106 [12:16<02:27, 11.67it/s]<00:08, 21.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:06, 20.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRRC2A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL15.dataset already exists MRPL15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCNQ.dataset already exists CCNQ adata already tokenized.


 19%|█▊        | 390/2106 [12:25<42:40,  1.49s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ADAM10.dataset already exists ADAM10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN5.dataset already exists DCTN5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GPN2.dataset already exists GPN2 adata already tokenized.


 19%|█▊        | 392/2106 [12:25<29:53,  1.05s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DSTYK.dataset already exists DSTYK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TSFM.dataset already exists TSFM adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL11.dataset already exists NOL11 adata already tokenized.


 19%|█▉        | 396/2106 [12:25<15:29,  1.84it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C1orf131.dataset already exists C1orf131 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFV2.dataset already exists NDUFV2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METAP1.dataset already exists METAP1 adata already tokenized.


 19%|█▉        | 398/2106 [12:25<11:26,  2.49it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PGAM5.dataset already exists PGAM5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL7.dataset already exists NOL7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL37.dataset already exists RPL37 adata already tokenized.


 19%|█▉        | 402/2106 [12:25<06:41,  4.25it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFFL.dataset already exists RFFL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R10.dataset already exists PPP1R10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS10.dataset already exists INTS10 adata already tokenized.


 19%|█▉        | 404/2106 [12:26<05:24,  5.24it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP160.dataset already exists NUP160 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SYMPK.dataset already exists SYMPK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPW.dataset already exists CENPW adata already tokenized.


 19%|█▉        | 408/2106 [12:26<03:49,  7.41it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USP37.dataset already exists USP37 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MLST8.dataset already exists MLST8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL13.dataset already exists MRPL13 adata already tokenized.


 19%|█▉        | 410/2106 [12:26<03:22,  8.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS25.dataset already exists VPS25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MIOS.dataset already exists MIOS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSUN4.dataset already exists NSUN4 adata already tokenized.


 20%|█▉        | 414/2106 [12:27<02:53,  9.77it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL7L1.dataset already exists RPL7L1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MKRN1.dataset already exists MKRN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA3.dataset already exists NDUFA3 adata already tokenized.


 20%|█▉        | 416/2106 [12:27<02:44, 10.28it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R11.dataset already exists PPP1R11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PWP1.dataset already exists PWP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP43.dataset already exists NUP43 adata already tokenized.


 20%|█▉        | 420/2106 [12:27<02:30, 11.22it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJA3.dataset already exists DNAJA3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EEFSEC.dataset already exists EEFSEC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPDL.dataset already exists HNRNPDL adata already tokenized.


 20%|██        | 422/2106 [12:27<02:27, 11.38it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KANSL2.dataset already exists KANSL2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSCB.dataset already exists HSCB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC10.dataset already exists EXOSC10 adata already tokenized.


 20%|██        | 426/2106 [12:28<02:24, 11.63it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CRKL.dataset already exists CRKL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPC.dataset already exists CENPC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DAP3.dataset already exists DAP3 adata already tokenized.


 20%|██        | 428/2106 [12:28<02:23, 11.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GRB2.dataset already exists GRB2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCND3.dataset already exists CCND3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS7.dataset already exists RPS7 adata already tokenized.


 20%|██        | 430/2106 [12:28<02:23, 11.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPLP2.dataset already exists RPLP2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.52 examples/s][A
 20%|██        | 430/2106 [12:46<02:23, 11.70it/s]<00:04, 28.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL9.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UPF1.dataset already exists UPF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDAD1.dataset already exists SDAD1 adata already tokenized.


 21%|██        | 436/2106 [12:51<39:13,  1.41s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GGPS1.dataset already exists GGPS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC35G2.dataset already exists SLC35G2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WARS.dataset already exists WARS adata already tokenized.


 21%|██        | 438/2106 [12:51<27:01,  1.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTBP1.dataset already exists PTBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RABGGTA.dataset already exists RABGGTA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJC17.dataset already exists DNAJC17 adata already tokenized.


 21%|██        | 440/2106 [12:51<19:07,  1.45it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM13.dataset already exists TIMM13 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.68 examples/s][A
 21%|██        | 440/2106 [13:06<19:07,  1.45it/s]<00:07, 28.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS28.dataset already exists RPS28 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.64 examples/s][A
 21%|██        | 442/2106 [13:26<1:46:38,  3.85s/it]0:10, 26.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSRP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF14.dataset already exists RNF14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2E2.dataset already exists GTF2E2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TOPBP1.dataset already exists TOPBP1 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.47 examples/s][A
 21%|██        | 445/2106 [13:56<2:21:36,  5.12s/it]0:05, 23.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS54.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNHIT3.dataset already exists ZNHIT3 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.78 examples/s][A
 21%|██▏       | 448/2106 [14:16<2:45:51,  6.00s/it]0:07, 28.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSL1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GET1.dataset already exists GET1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.91 examples/s][A
 21%|██▏       | 450/2106 [14:36<3:29:08,  7.58s/it]0:11, 26.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:10, 21.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBXO5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GIT2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLP1.dataset already exists CLP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCM1.dataset already exists PCM1 adata already tokenized.


 22%|██▏       | 457/2106 [15:15<1:46:57,  3.89s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THG1L.dataset already exists THG1L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRP9.dataset already exists SRP9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUPT5H.dataset already exists SUPT5H adata already tokenized.


 22%|██▏       | 459/2106 [15:15<1:09:33,  2.53s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRIX1.dataset already exists BRIX1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARL6IP6.dataset already exists ARL6IP6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA2.dataset already exists NDUFA2 adata already tokenized.


 22%|██▏       | 461/2106 [15:16<46:49,  1.71s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLTC.dataset already exists CLTC adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.89 examples/s][A
 22%|██▏       | 461/2106 [15:27<46:49,  1.71s/it]<00:15, 22.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NPLOC4.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2M.dataset already exists POLR2M adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIPIN.dataset already exists TIPIN adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.65 examples/s][A
 22%|██▏       | 463/2106 [15:57<2:03:22,  4.51s/it]0:07, 22.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TSEN2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTC1.dataset already exists TTC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GPS1.dataset already exists GPS1 adata already tokenized.


 22%|██▏       | 470/2106 [16:02<1:04:28,  2.36s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1G1.dataset already exists ATP6V1G1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX3X.dataset already exists DDX3X adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPRTN.dataset already exists SPRTN adata already tokenized.


 22%|██▏       | 472/2106 [16:02<43:35,  1.60s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL18.dataset already exists RPL18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX21.dataset already exists DDX21 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.63 examples/s][A
 22%|██▏       | 472/2106 [16:17<43:35,  1.60s/it]<00:07, 28.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZBTB4.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RABGGTB.dataset already exists RABGGTB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CFL1.dataset already exists CFL1 adata already tokenized.


 23%|██▎       | 478/2106 [16:26<50:51,  1.87s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTU2.dataset already exists CTU2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MTHFD1.dataset already exists MTHFD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAU.dataset already exists FAU adata already tokenized.


 23%|██▎       | 480/2106 [16:26<34:32,  1.27s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX51.dataset already exists DDX51 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL41.dataset already exists RPL41 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CAST.dataset already exists CAST adata already tokenized.


 23%|██▎       | 484/2106 [16:27<17:07,  1.58it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUMO2.dataset already exists SUMO2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6AP2.dataset already exists ATP6AP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NEMF.dataset already exists NEMF adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
 23%|██▎       | 484/2106 [16:37<17:07,  1.58it/s]<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAF1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MPHOSPH6.dataset already exists MPHOSPH6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MEPCE.dataset already exists MEPCE adata already tokenized.


 23%|██▎       | 490/2106 [16:52<45:43,  1.70s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R2.dataset already exists PPP1R2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5MF.dataset already exists ATP5MF adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EPB41L2.dataset already exists EPB41L2 adata already tokenized.


 23%|██▎       | 492/2106 [16:52<31:13,  1.16s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUMA1.dataset already exists NUMA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMGA1.dataset already exists HMGA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMC2.dataset already exists SMC2 adata already tokenized.


 24%|██▎       | 496/2106 [16:52<15:40,  1.71it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF11.dataset already exists SRSF11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/U2SURP.dataset already exists U2SURP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2H.dataset already exists POLR2H adata already tokenized.


 24%|██▎       | 498/2106 [16:52<11:30,  2.33it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUB1.dataset already exists NUB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHODH.dataset already exists DHODH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS23.dataset already exists RPS23 adata already tokenized.


 24%|██▍       | 502/2106 [16:53<06:38,  4.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS6.dataset already exists MRPS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC4.dataset already exists ANAPC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RCC1.dataset already exists RCC1 adata already tokenized.


 24%|██▍       | 504/2106 [16:53<05:17,  5.05it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS17.dataset already exists RPS17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TLN1.dataset already exists TLN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ALG13.dataset already exists ALG13 adata already tokenized.


 24%|██▍       | 508/2106 [16:53<03:41,  7.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX30.dataset already exists DHX30 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR12.dataset already exists WDR12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3C.dataset already exists POLR3C adata already tokenized.


 24%|██▍       | 510/2106 [16:53<03:11,  8.35it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC40.dataset already exists CDC40 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CSE1L.dataset already exists CSE1L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED29.dataset already exists MED29 adata already tokenized.


 24%|██▍       | 512/2106 [16:53<02:50,  9.33it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3K.dataset already exists POLR3K adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NPEPPS.dataset already exists NPEPPS adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.13 examples/s][A
 24%|██▍       | 512/2106 [17:07<02:50,  9.33it/s]<00:11, 23.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COA5.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPL.dataset already exists HNRNPL adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.35 examples/s][A
 24%|██▍       | 515/2106 [17:37<1:21:26,  3.07s/it]0:04, 26.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMBOX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HERC1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX27.dataset already exists DDX27 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.22 examples/s][A
 25%|██▍       | 518/2106 [18:17<3:26:28,  7.80s/it]0:09, 24.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PAK1IP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAX.dataset already exists MAX adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GSK3B.dataset already exists GSK3B adata already tokenized.


 25%|██▍       | 524/2106 [18:27<1:22:49,  3.14s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPE.dataset already exists SNRPE adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL41.dataset already exists MRPL41 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRORP.dataset already exists PRORP adata already tokenized.


 25%|██▍       | 526/2106 [18:27<54:09,  2.06s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2N.dataset already exists UBE2N adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAP18.dataset already exists SAP18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MEN1.dataset already exists MEN1 adata already tokenized.


 25%|██▌       | 530/2106 [18:27<25:28,  1.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DOHH.dataset already exists DOHH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIE.dataset already exists PPIE adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFB10.dataset already exists NDUFB10 adata already tokenized.


 25%|██▌       | 532/2106 [18:28<18:02,  1.45it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IMPA2.dataset already exists IMPA2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPSF1.dataset already exists CPSF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSME2.dataset already exists PSME2 adata already tokenized.


 25%|██▌       | 536/2106 [18:28<09:40,  2.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCT6A.dataset already exists CCT6A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRM2.dataset already exists RRM2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL50.dataset already exists MRPL50 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PHB2.dataset already exists PHB2 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.83 examples/s][A
 25%|██▌       | 536/2106 [18:47<09:40,  2.70it/s]<00:06, 22.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX17.dataset already exists DDX17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RARS2.dataset already exists RARS2 adata already tokenized.


 26%|██▌       | 542/2106 [18:52<49:35,  1.90s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OPA1.dataset already exists OPA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCBP1.dataset already exists NCBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBBP8.dataset already exists RBBP8 adata already tokenized.


 26%|██▌       | 546/2106 [18:52<25:13,  1.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX36.dataset already exists DHX36 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C1QBP.dataset already exists C1QBP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF7.dataset already exists TAF7 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.91 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 26.70 examples/s][A
 26%|██▌       | 546/2106 [19:07<25:13,  1.03it/s]<00:07, 24.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFAF7.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX11.dataset already exists COX11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GOLT1B.dataset already exists GOLT1B adata already tokenized.


 26%|██▌       | 552/2106 [19:15<43:25,  1.68s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MIS12.dataset already exists MIS12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFB8.dataset already exists NDUFB8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INPPL1.dataset already exists INPPL1 adata already tokenized.


 26%|██▋       | 554/2106 [19:16<29:48,  1.15s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PNKP.dataset already exists PNKP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSPH1.dataset already exists HSPH1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/QARS.dataset already exists QARS adata already tokenized.


 26%|██▋       | 556/2106 [19:16<20:53,  1.24it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL18A.dataset already exists RPL18A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS4.dataset already exists INTS4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.42 examples/s][A
 26%|██▋       | 556/2106 [19:27<20:53,  1.24it/s]<00:12, 25.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS15A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOB1.dataset already exists NOB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PIK3R4.dataset already exists PIK3R4 adata already tokenized.


 27%|██▋       | 562/2106 [19:40<53:11,  2.07s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS14.dataset already exists MRPS14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNTB2.dataset already exists SNTB2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL14.dataset already exists MRPL14 adata already tokenized.


 27%|██▋       | 566/2106 [19:40<26:50,  1.05s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUTF2.dataset already exists NUTF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NLE1.dataset already exists NLE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBL5.dataset already exists UBL5 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/JMJD6.dataset already exists JMJD6 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.23 examples/s][A
 27%|██▋       | 566/2106 [19:57<26:50,  1.05s/it]<00:06, 23.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WBP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TELO2.dataset already exists TELO2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLSPN.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOM1.dataset already exists NOM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NIFK.dataset already exists NIFK adata already tokenized.


 27%|██▋       | 576/2106 [20:27<57:54,  2.27s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TKT.dataset already exists TKT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STT3B.dataset already exists STT3B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHCHD1.dataset already exists CHCHD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2S2.dataset already exists EIF2S2 adata already tokenized.


 27%|██▋       | 578/2106 [20:28<41:02,  1.61s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS37A.dataset already exists VPS37A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USPL1.dataset already exists USPL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CIAO1.dataset already exists CIAO1 adata already tokenized.


 28%|██▊       | 582/2106 [20:28<21:00,  1.21it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCL1.dataset already exists MCL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MTREX.dataset already exists MTREX adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ESF1.dataset already exists ESF1 adata already tokenized.


 28%|██▊       | 584/2106 [20:28<15:14,  1.66it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRRAP.dataset already exists TRRAP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TINF2.dataset already exists TINF2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.15 examples/s][A
 28%|██▊       | 584/2106 [20:47<15:14,  1.66it/s]<00:07, 21.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP4R2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTK.dataset already exists TTK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5F1A.dataset already exists ATP5F1A adata already tokenized.


 28%|██▊       | 589/2106 [20:54<58:42,  2.32s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSM12.dataset already exists LSM12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM19.dataset already exists RBM19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF4A1.dataset already exists EIF4A1 adata already tokenized.


 28%|██▊       | 593/2106 [20:55<28:41,  1.14s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEC13.dataset already exists SEC13 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFS3.dataset already exists NDUFS3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM33.dataset already exists RBM33 adata already tokenized.


 28%|██▊       | 595/2106 [20:55<20:28,  1.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MICOS10.dataset already exists MICOS10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SSBP3.dataset already exists SSBP3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRKRA.dataset already exists PRKRA adata already tokenized.


 28%|██▊       | 599/2106 [20:55<10:52,  2.31it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1D.dataset already exists ATP6V1D adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS10.dataset already exists MRPS10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CEBPG.dataset already exists CEBPG adata already tokenized.


 29%|██▊       | 601/2106 [20:55<08:10,  3.07it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL11.dataset already exists RPL11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IMPDH2.dataset already exists IMPDH2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NPAT.dataset already exists NPAT adata already tokenized.


 29%|██▊       | 603/2106 [20:55<06:19,  3.96it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX15.dataset already exists DHX15 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.19 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.26 examples/s][A
 29%|██▊       | 603/2106 [21:07<06:19,  3.96it/s]<00:10, 27.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMG6.dataset already exists SMG6 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KDM5C.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBX1.dataset already exists RBX1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAD2L2.dataset already exists MAD2L2 adata already tokenized.


 29%|██▉       | 610/2106 [21:43<1:20:31,  3.23s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2S3.dataset already exists EIF2S3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SCNM1.dataset already exists SCNM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANKLE2.dataset already exists ANKLE2 adata already tokenized.


 29%|██▉       | 614/2106 [21:44<39:20,  1.58s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX49.dataset already exists DDX49 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHERP.dataset already exists CHERP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP153.dataset already exists NUP153 adata already tokenized.


 29%|██▉       | 616/2106 [21:44<27:54,  1.12s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM7.dataset already exists MCM7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM44.dataset already exists TIMM44 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD8.dataset already exists PSMD8 adata already tokenized.


 29%|██▉       | 620/2106 [21:44<14:32,  1.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SARNP.dataset already exists SARNP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL36AL.dataset already exists RPL36AL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2D3.dataset already exists UBE2D3 adata already tokenized.


 30%|██▉       | 622/2106 [21:44<10:45,  2.30it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX33.dataset already exists DHX33 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SERF2.dataset already exists SERF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABCE1.dataset already exists ABCE1 adata already tokenized.


 30%|██▉       | 626/2106 [21:44<06:16,  3.93it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RTTN.dataset already exists RTTN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS22.dataset already exists MRPS22 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAMD4B.dataset already exists SAMD4B adata already tokenized.


 30%|██▉       | 628/2106 [21:45<04:58,  4.95it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BMS1.dataset already exists BMS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MOK.dataset already exists MOK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DLD.dataset already exists DLD adata already tokenized.


 30%|███       | 632/2106 [21:45<03:24,  7.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PFDN5.dataset already exists PFDN5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAM50A.dataset already exists FAM50A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA4.dataset already exists NDUFA4 adata already tokenized.


 30%|███       | 634/2106 [21:45<02:58,  8.25it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNAPC4.dataset already exists SNAPC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL28.dataset already exists MRPL28 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.26 examples/s][A
 30%|███       | 634/2106 [21:57<02:58,  8.25it/s]<00:07, 30.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRP9.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED16.dataset already exists MED16 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DMAP1.dataset already exists DMAP1 adata already tokenized.


 30%|███       | 639/2106 [22:06<42:22,  1.73s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BAG6.dataset already exists BAG6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V0C.dataset already exists ATP6V0C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARIH1.dataset already exists ARIH1 adata already tokenized.


 31%|███       | 643/2106 [22:06<20:53,  1.17it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/E4F1.dataset already exists E4F1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS29.dataset already exists RPS29 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF1B.dataset already exists TAF1B adata already tokenized.


 31%|███       | 645/2106 [22:07<15:02,  1.62it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ADSL.dataset already exists ADSL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XRCC1.dataset already exists XRCC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUDCD3.dataset already exists NUDCD3 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
 31%|███       | 645/2106 [22:17<15:02,  1.62it/s]<00:15, 21.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP16.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMGB3.dataset already exists HMGB3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX52.dataset already exists DDX52 adata already tokenized.


 31%|███       | 651/2106 [22:30<47:49,  1.97s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLBP.dataset already exists SLBP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELP2.dataset already exists ELP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCDC115.dataset already exists CCDC115 adata already tokenized.


 31%|███       | 655/2106 [22:31<24:21,  1.01s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UXT.dataset already exists UXT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COG4.dataset already exists COG4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ICE1.dataset already exists ICE1 adata already tokenized.


 31%|███       | 657/2106 [22:31<17:35,  1.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR44.dataset already exists WDR44 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF3C3.dataset already exists GTF3C3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLA1.dataset already exists POLA1 adata already tokenized.


 31%|███▏      | 661/2106 [22:31<09:34,  2.52it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GAK.dataset already exists GAK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HAUS1.dataset already exists HAUS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5F1B.dataset already exists ATP5F1B adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.45 examples/s][A
 31%|███▏      | 661/2106 [22:47<09:34,  2.52it/s]<00:08, 24.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SKA3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR5.dataset already exists WDR5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS12.dataset already exists MRPS12 adata already tokenized.


 32%|███▏      | 667/2106 [22:57<39:25,  1.64s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NFRKB.dataset already exists NFRKB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMG1.dataset already exists PSMG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFAB1.dataset already exists NDUFAB1 adata already tokenized.


 32%|███▏      | 669/2106 [22:57<27:03,  1.13s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDOST.dataset already exists DDOST adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM8A.dataset already exists TIMM8A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCNL1.dataset already exists CCNL1 adata already tokenized.


 32%|███▏      | 673/2106 [22:57<13:38,  1.75it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC6.dataset already exists EXOSC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C7orf26.dataset already exists C7orf26 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MT2A.dataset already exists MT2A adata already tokenized.


 32%|███▏      | 675/2106 [22:57<10:01,  2.38it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DYNLRB1.dataset already exists DYNLRB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WBP11.dataset already exists WBP11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRA1.dataset already exists SRA1 adata already tokenized.


 32%|███▏      | 679/2106 [22:58<05:48,  4.10it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSM4.dataset already exists LSM4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPJ.dataset already exists CENPJ adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YRDC.dataset already exists YRDC adata already tokenized.


 32%|███▏      | 681/2106 [22:58<04:37,  5.14it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAMAC.dataset already exists RAMAC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRM1.dataset already exists RRM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF24.dataset already exists ZNF24 adata already tokenized.


 33%|███▎      | 685/2106 [22:58<03:11,  7.42it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRMT112.dataset already exists TRMT112 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL39.dataset already exists RPL39 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XPO1.dataset already exists XPO1 adata already tokenized.


 33%|███▎      | 687/2106 [22:58<02:49,  8.40it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LTV1.dataset already exists LTV1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2H2.dataset already exists GTF2H2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SS18L2.dataset already exists SS18L2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GCLC.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS31.dataset already exists MRPS31 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SIN3A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDE2.dataset already exists SDE2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ACTB.dataset already exists ACTB adata already tokenized.


 33%|███▎      | 695/2106 [23:43<1:07:26,  2.87s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCAF6.dataset already exists DCAF6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HIPK1.dataset already exists HIPK1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METTL1.dataset already exists METTL1 adata already tokenized.


 33%|███▎      | 699/2106 [23:43<33:54,  1.45s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP3.dataset already exists CHMP3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CAD.dataset already exists CAD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL9.dataset already exists MRPL9 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPA1.dataset already exists HNRNPA1 adata already tokenized.


  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 28.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.28 examples/s][A
 33%|███▎      | 699/2106 [23:58<33:54,  1.45s/it]<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF168.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANKRD39.dataset already exists ANKRD39 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRRM1.dataset already exists SRRM1 adata already tokenized.


 34%|███▎      | 706/2106 [24:04<35:43,  1.53s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AAAS.dataset already exists AAAS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBA5.dataset already exists UBA5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF407.dataset already exists ZNF407 adata already tokenized.


 34%|███▎      | 708/2106 [24:04<25:01,  1.07s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF31.dataset already exists RNF31 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZC3H4.dataset already exists ZC3H4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PHF12.dataset already exists PHF12 adata already tokenized.


 34%|███▍      | 712/2106 [24:04<12:52,  1.80it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDHA.dataset already exists SDHA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELL.dataset already exists ELL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM4.dataset already exists RBM4 adata already tokenized.


 34%|███▍      | 714/2106 [24:04<09:27,  2.45it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPSF4.dataset already exists CPSF4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMGCR.dataset already exists HMGCR adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPX2.dataset already exists TPX2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.03 examples/s][A
 34%|███▍      | 714/2106 [24:18<09:27,  2.45it/s]<00:12, 22.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/URI1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AASDHPPT.dataset already exists AASDHPPT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL26.dataset already exists RPL26 adata already tokenized.


 34%|███▍      | 719/2106 [24:28<48:51,  2.11s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL27.dataset already exists RPL27 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.70 examples/s][A
 34%|███▍      | 719/2106 [24:48<48:51,  2.11s/it]<00:07, 21.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPDL1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL29.dataset already exists RPL29 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CSNK1A1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POP1.dataset already exists POP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOC2.dataset already exists EXOC2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.43 examples/s][A
 34%|███▍      | 723/2106 [25:38<2:38:21,  6.87s/it]0:05, 21.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBBP4.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC5.dataset already exists ANAPC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAB6A.dataset already exists RAB6A adata already tokenized.


 35%|███▍      | 729/2106 [25:44<1:32:13,  4.02s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TSEN54.dataset already exists TSEN54 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZC3H8.dataset already exists ZC3H8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PES1.dataset already exists PES1 adata already tokenized.


 35%|███▍      | 731/2106 [25:44<1:03:12,  2.76s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INF2.dataset already exists INF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC1.dataset already exists THOC1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.78 examples/s][A
 35%|███▍      | 731/2106 [25:58<1:03:12,  2.76s/it]0:10, 22.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNUPN.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/QRSL1.dataset already exists QRSL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SKP2.dataset already exists SKP2 adata already tokenized.


 35%|███▍      | 737/2106 [26:08<1:01:45,  2.71s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP6C.dataset already exists PPP6C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTPBP4.dataset already exists GTPBP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM28.dataset already exists RBM28 adata already tokenized.


 35%|███▌      | 739/2106 [26:08<43:27,  1.91s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TOX4.dataset already exists TOX4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3D.dataset already exists EIF3D adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.63 examples/s][A
 35%|███▌      | 739/2106 [26:18<43:27,  1.91s/it]<00:12, 27.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCT3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2F2.dataset already exists GTF2F2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UMPS.dataset already exists UMPS adata already tokenized.


 35%|███▌      | 745/2106 [26:31<54:09,  2.39s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CMTR2.dataset already exists CMTR2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EEF1G.dataset already exists EEF1G adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SCAP.dataset already exists SCAP adata already tokenized.


 36%|███▌      | 749/2106 [26:31<27:16,  1.21s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DYNC1H1.dataset already exists DYNC1H1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC39A10.dataset already exists SLC39A10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMC1A.dataset already exists SMC1A adata already tokenized.


 36%|███▌      | 751/2106 [26:32<19:31,  1.16it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS33A.dataset already exists VPS33A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIF11.dataset already exists KIF11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSF.dataset already exists NSF adata already tokenized.


 36%|███▌      | 755/2106 [26:32<10:27,  2.15it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TEFM.dataset already exists TEFM adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INO80C.dataset already exists INO80C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GNL3L.dataset already exists GNL3L adata already tokenized.


 36%|███▌      | 757/2106 [26:32<07:50,  2.87it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOC7.dataset already exists EXOC7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUBP2.dataset already exists NUBP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS4.dataset already exists COPS4 adata already tokenized.


 36%|███▌      | 761/2106 [26:32<04:44,  4.72it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PELO.dataset already exists PELO adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM10.dataset already exists MCM10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCNK.dataset already exists CCNK adata already tokenized.


 36%|███▌      | 763/2106 [26:32<03:53,  5.76it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL44.dataset already exists MRPL44 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL38.dataset already exists MRPL38 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF143.dataset already exists ZNF143 adata already tokenized.


 36%|███▋      | 765/2106 [26:33<03:16,  6.84it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMA7.dataset already exists PSMA7 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.38 examples/s][A
 36%|███▋      | 765/2106 [26:48<03:16,  6.84it/s]<00:06, 28.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZMAT2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LUC7L3.dataset already exists LUC7L3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UFM1.dataset already exists UFM1 adata already tokenized.


 37%|███▋      | 770/2106 [26:57<45:02,  2.02s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIA.dataset already exists PPIA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARF4.dataset already exists ARF4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM39.dataset already exists RBM39 adata already tokenized.


 37%|███▋      | 774/2106 [26:58<22:05,  1.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR46.dataset already exists WDR46 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPR.dataset already exists TPR adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF720.dataset already exists ZNF720 adata already tokenized.


 37%|███▋      | 776/2106 [26:58<15:50,  1.40it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCBP2.dataset already exists PCBP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPD1.dataset already exists SNRPD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL28.dataset already exists RPL28 adata already tokenized.


 37%|███▋      | 778/2106 [26:58<11:32,  1.92it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR82.dataset already exists WDR82 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.06 examples/s][A
 37%|███▋      | 778/2106 [27:08<11:32,  1.92it/s]<00:14, 24.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARPC3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STIL.dataset already exists STIL adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS5.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POP4.dataset already exists POP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMA16.dataset already exists TMA16 adata already tokenized.


 37%|███▋      | 786/2106 [27:44<53:00,  2.41s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAD17.dataset already exists RAD17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL17.dataset already exists RPL17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUPT4H1.dataset already exists SUPT4H1 adata already tokenized.


 37%|███▋      | 788/2106 [27:44<36:16,  1.65s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMB5.dataset already exists PSMB5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PAF1.dataset already exists PAF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSM6.dataset already exists LSM6 adata already tokenized.


 38%|███▊      | 792/2106 [27:44<17:54,  1.22it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCRC1.dataset already exists UQCRC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC2.dataset already exists THOC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DIS3.dataset already exists DIS3 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.56 examples/s][A
 38%|███▊      | 792/2106 [27:58<17:54,  1.22it/s]<00:12, 22.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POGZ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAU2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCT2.dataset already exists CCT2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3L.dataset already exists EIF3L adata already tokenized.


 38%|███▊      | 798/2106 [28:33<1:22:04,  3.76s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCAPD2.dataset already exists NCAPD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RTRAF.dataset already exists RTRAF adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC39A7.dataset already exists SLC39A7 adata already tokenized.


 38%|███▊      | 800/2106 [28:34<55:25,  2.55s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NR2C2AP.dataset already exists NR2C2AP adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.16 examples/s][A
 38%|███▊      | 800/2106 [28:48<55:25,  2.55s/it]<00:06, 29.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C9orf16.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCRFS1.dataset already exists UQCRFS1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.61 examples/s][A
 38%|███▊      | 802/2106 [29:08<1:47:35,  4.95s/it]0:11, 24.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNMT.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF8.dataset already exists PRPF8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FGFR1OP.dataset already exists FGFR1OP adata already tokenized.


 38%|███▊      | 808/2106 [29:20<57:23,  2.65s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPA1.dataset already exists PPA1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA11.dataset already exists NDUFA11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF31.dataset already exists PRPF31 adata already tokenized.


 38%|███▊      | 810/2106 [29:21<37:56,  1.76s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AURKB.dataset already exists AURKB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USP5.dataset already exists USP5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MBNL1.dataset already exists MBNL1 adata already tokenized.


 39%|███▊      | 814/2106 [29:21<18:06,  1.19it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR1E.dataset already exists POLR1E adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METTL16.dataset already exists METTL16 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KAT5.dataset already exists KAT5 adata already tokenized.


 39%|███▊      | 816/2106 [29:21<12:55,  1.66it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2E1.dataset already exists GTF2E1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL8.dataset already exists RPL8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HEXIM1.dataset already exists HEXIM1 adata already tokenized.


 39%|███▉      | 818/2106 [29:21<09:28,  2.26it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS5.dataset already exists INTS5 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.88 examples/s][A
 39%|███▉      | 818/2106 [29:38<09:28,  2.26it/s]<00:10, 20.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3G.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MZT1.dataset already exists MZT1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.43 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS18B.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1CB.dataset already exists PPP1CB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC42.dataset already exists CDC42 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YEATS4.dataset already exists YEATS4 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:20, 20.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.28 examples/s][A
 39%|███▉      | 823/2106 [30:28<1:41:46,  4.76s/it]0:05, 22.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP58.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/E2F6.dataset already exists E2F6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD6.dataset already exists PSMD6 adata already tokenized.


 39%|███▉      | 829/2106 [30:34<1:13:14,  3.44s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ERH.dataset already exists ERH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA20.dataset already exists NAA20 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LRP5.dataset already exists LRP5 adata already tokenized.


 39%|███▉      | 831/2106 [30:34<51:17,  2.41s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BAP1.dataset already exists BAP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCEE.dataset already exists MCEE adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.60 examples/s][A
 39%|███▉      | 831/2106 [30:48<51:17,  2.41s/it]<00:11, 20.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:09, 20.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRA2B.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPOX.dataset already exists CPOX adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SOD1.dataset already exists SOD1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.26 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.80 examples/s][A
 40%|███▉      | 835/2106 [31:18<1:25:04,  4.02s/it]0:07, 23.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KPNB1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDIPT.dataset already exists CDIPT adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOC4.dataset already exists EXOC4 adata already tokenized.


 40%|███▉      | 840/2106 [31:24<1:11:52,  3.41s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GFOD2.dataset already exists GFOD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX24.dataset already exists DDX24 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.39 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.55 examples/s][A
 40%|███▉      | 840/2106 [31:38<1:11:52,  3.41s/it]0:11, 22.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNPC3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAM207A.dataset already exists FAM207A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CXXC1.dataset already exists CXXC1 adata already tokenized.


 40%|████      | 846/2106 [31:49<50:28,  2.40s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GAR1.dataset already exists GAR1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMC4.dataset already exists SMC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPTLC1.dataset already exists SPTLC1 adata already tokenized.


 40%|████      | 848/2106 [31:49<34:10,  1.63s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF3C2.dataset already exists GTF3C2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AARS.dataset already exists AARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHAF1B.dataset already exists CHAF1B adata already tokenized.


 40%|████      | 852/2106 [31:50<16:42,  1.25it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS7.dataset already exists MRPS7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL24.dataset already exists RPL24 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MTOR.dataset already exists MTOR adata already tokenized.


 41%|████      | 854/2106 [31:50<12:01,  1.73it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TOE1.dataset already exists TOE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PXN.dataset already exists PXN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBMXL1.dataset already exists RBMXL1 adata already tokenized.


 41%|████      | 858/2106 [31:50<06:34,  3.17it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SENP6.dataset already exists SENP6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2I.dataset already exists POLR2I adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BUD31.dataset already exists BUD31 adata already tokenized.


 41%|████      | 860/2106 [31:50<05:05,  4.07it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTBP2.dataset already exists CTBP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS1.dataset already exists INTS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEPHS2.dataset already exists SEPHS2 adata already tokenized.


 41%|████      | 862/2106 [31:50<04:04,  5.08it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC2.dataset already exists EXOSC2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 28.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.56 examples/s][A
 41%|████      | 862/2106 [32:08<04:04,  5.08it/s]<00:04, 29.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/H2AFZ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TM7SF2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF19.dataset already exists PRPF19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PABPC4.dataset already exists PABPC4 adata already tokenized.


 41%|████▏     | 869/2106 [32:37<52:16,  2.54s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THAP1.dataset already exists THAP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX20.dataset already exists DDX20 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EWSR1.dataset already exists EWSR1 adata already tokenized.


 41%|████▏     | 871/2106 [32:37<35:06,  1.71s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C1orf109.dataset already exists C1orf109 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL14.dataset already exists RPL14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL46.dataset already exists MRPL46 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
 41%|████▏     | 871/2106 [32:48<35:06,  1.71s/it]<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDPK1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MYCBP.dataset already exists MYCBP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZCCHC9.dataset already exists ZCCHC9 adata already tokenized.


 42%|████▏     | 878/2106 [33:05<40:49,  1.99s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UCHL5.dataset already exists UCHL5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMEM161B.dataset already exists TMEM161B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FUNDC2.dataset already exists FUNDC2 adata already tokenized.


 42%|████▏     | 880/2106 [33:05<28:09,  1.38s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF20.dataset already exists RNF20 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DPY19L4.dataset already exists DPY19L4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTDP1.dataset already exists CTDP1 adata already tokenized.


 42%|████▏     | 882/2106 [33:05<19:47,  1.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPH.dataset already exists CENPH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METTL23.dataset already exists METTL23 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.02 examples/s][A
 42%|████▏     | 882/2106 [33:19<19:47,  1.03it/s]<00:07, 29.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDCA8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRFBP1.dataset already exists SRFBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL10A.dataset already exists RPL10A adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFC3.dataset already exists RFC3 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.16 examples/s][A
 42%|████▏     | 886/2106 [33:39<55:22,  2.72s/it]<00:09, 26.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KDM1A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NFYC.dataset already exists NFYC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KLC2.dataset already exists KLC2 adata already tokenized.


 42%|████▏     | 893/2106 [33:48<39:53,  1.97s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GFER.dataset already exists GFER adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/REXO2.dataset already exists REXO2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCDC84.dataset already exists CCDC84 adata already tokenized.


 42%|████▏     | 895/2106 [33:48<27:41,  1.37s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED6.dataset already exists MED6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR1C.dataset already exists POLR1C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CAP1.dataset already exists CAP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.91 examples/s][A
 42%|████▏     | 895/2106 [33:59<27:41,  1.37s/it]<00:13, 25.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPATA5.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL27.dataset already exists MRPL27 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM10.dataset already exists RBM10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCAR1.dataset already exists CCAR1 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.51 examples/s][A
 43%|████▎     | 899/2106 [34:29<1:07:13,  3.34s/it]0:09, 24.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK7.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL43.dataset already exists MRPL43 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MOB4.dataset already exists MOB4 adata already tokenized.


 44%|████▍     | 934/2106 [36:15<1:11:34,  3.66s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SGPP1.dataset already exists SGPP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBA3.dataset already exists UBA3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBTF.dataset already exists UBTF adata already tokenized.


 45%|████▍     | 938/2106 [36:15<33:39,  1.73s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRD4.dataset already exists BRD4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ORC5.dataset already exists ORC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMEM214.dataset already exists TMEM214 adata already tokenized.


 45%|████▍     | 940/2106 [36:16<23:36,  1.22s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFAF3.dataset already exists NDUFAF3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPE.dataset already exists CENPE adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRNT1.dataset already exists TRNT1 adata already tokenized.


 45%|████▍     | 942/2106 [36:16<16:47,  1.16it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAD51C.dataset already exists RAD51C adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
 45%|████▍     | 942/2106 [36:29<16:47,  1.16it/s]<00:14, 20.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCMBP.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NHLRC2.dataset already exists NHLRC2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTPS1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TEX10.dataset already exists TEX10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NF1.dataset already exists NF1 adata already tokenized.


 45%|████▌     | 950/2106 [37:01<43:28,  2.26s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRKCA.dataset already exists PRKCA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GEMIN7.dataset already exists GEMIN7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRIM1.dataset already exists PRIM1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.93 examples/s][A
 45%|████▌     | 950/2106 [37:19<43:28,  2.26s/it]<00:07, 24.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RGP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS13D.dataset already exists VPS13D adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SBDS.dataset already exists SBDS adata already tokenized.


 45%|████▌     | 955/2106 [37:25<53:12,  2.77s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP54.dataset already exists NUP54 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCDC59.dataset already exists CCDC59 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WAC.dataset already exists WAC adata already tokenized.


 45%|████▌     | 957/2106 [37:25<36:00,  1.88s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC1.dataset already exists ANAPC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COQ4.dataset already exists COQ4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.62 examples/s][A
 45%|████▌     | 957/2106 [37:39<36:00,  1.88s/it]<00:07, 27.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCKAP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AATF.dataset already exists AATF adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAD51D.dataset already exists RAD51D adata already tokenized.


 46%|████▌     | 964/2106 [37:49<35:14,  1.85s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KAT8.dataset already exists KAT8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM9.dataset already exists TIMM9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZC3H13.dataset already exists ZC3H13 adata already tokenized.


 46%|████▌     | 966/2106 [37:50<24:21,  1.28s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTF2.dataset already exists TTF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRMT5.dataset already exists PRMT5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ACTR10.dataset already exists ACTR10 adata already tokenized.


 46%|████▌     | 970/2106 [37:50<12:16,  1.54it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTI2.dataset already exists TTI2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDHD.dataset already exists SDHD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRAPPC4.dataset already exists TRAPPC4 adata already tokenized.


 46%|████▌     | 972/2106 [37:50<08:57,  2.11it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF40A.dataset already exists PRPF40A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPD2.dataset already exists SNRPD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBR4.dataset already exists UBR4 adata already tokenized.


 46%|████▋     | 976/2106 [37:50<05:08,  3.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPE.dataset already exists RPE adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF3B1.dataset already exists SF3B1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR1D.dataset already exists POLR1D adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.50 examples/s][A
 46%|████▋     | 976/2106 [38:09<05:08,  3.67it/s]<00:04, 29.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA8.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHCHD3.dataset already exists CHCHD3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNAPC1.dataset already exists SNAPC1 adata already tokenized.


 47%|████▋     | 981/2106 [38:14<38:03,  2.03s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DROSHA.dataset already exists DROSHA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OSBP.dataset already exists OSBP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GPN1.dataset already exists GPN1 adata already tokenized.


 47%|████▋     | 983/2106 [38:15<26:27,  1.41s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DIMT1.dataset already exists DIMT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ORC4.dataset already exists ORC4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.00 examples/s][A
 47%|████▋     | 983/2106 [38:29<26:27,  1.41s/it]<00:07, 27.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GET3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTMA.dataset already exists PTMA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNOT3.dataset already exists CNOT3 adata already tokenized.


 47%|████▋     | 989/2106 [38:37<39:44,  2.13s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS29.dataset already exists VPS29 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DERL2.dataset already exists DERL2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SON.dataset already exists SON adata already tokenized.


 47%|████▋     | 993/2106 [38:37<19:57,  1.08s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GMPPB.dataset already exists GMPPB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF7.dataset already exists SRSF7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COG1.dataset already exists COG1 adata already tokenized.


 47%|████▋     | 995/2106 [38:37<14:24,  1.28it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NFS1.dataset already exists NFS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLD3.dataset already exists POLD3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OIP5.dataset already exists OIP5 adata already tokenized.


 47%|████▋     | 999/2106 [38:37<07:38,  2.41it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC25A3.dataset already exists SLC25A3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPIA.dataset already exists RPIA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRPM7.dataset already exists TRPM7 adata already tokenized.


 48%|████▊     | 1001/2106 [38:38<05:48,  3.17it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SOD2.dataset already exists SOD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NACA.dataset already exists NACA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V0D1.dataset already exists ATP6V0D1 adata already tokenized.


 48%|████▊     | 1003/2106 [38:38<04:30,  4.08it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SARS2.dataset already exists SARS2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.15 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 24.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.29 examples/s][A
 48%|████▊     | 1003/2106 [38:49<04:30,  4.08it/s]00:12, 22.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIL4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:06, 20.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJC11.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ECD.dataset already exists ECD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCL.dataset already exists NCL adata already tokenized.


 48%|████▊     | 1010/2106 [39:24<46:39,  2.55s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF4G2.dataset already exists EIF4G2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP2R1A.dataset already exists PPP2R1A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TWNK.dataset already exists TWNK adata already tokenized.


 48%|████▊     | 1012/2106 [39:24<31:21,  1.72s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CEP192.dataset already exists CEP192 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBCA.dataset already exists TBCA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PLK1.dataset already exists PLK1 adata already tokenized.


 48%|████▊     | 1016/2106 [39:25<15:12,  1.19it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANAPC15.dataset already exists ANAPC15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL19.dataset already exists MRPL19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PFDN1.dataset already exists PFDN1 adata already tokenized.


 48%|████▊     | 1018/2106 [39:25<10:53,  1.66it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCNA2.dataset already exists CCNA2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ILF3.dataset already exists ILF3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRELID1.dataset already exists PRELID1 adata already tokenized.


 49%|████▊     | 1022/2106 [39:25<06:00,  3.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHTOP.dataset already exists CHTOP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ERCC3.dataset already exists ERCC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CASP8AP2.dataset already exists CASP8AP2 adata already tokenized.


 49%|████▊     | 1024/2106 [39:25<04:38,  3.89it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SYF2.dataset already exists SYF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRAPPC8.dataset already exists TRAPPC8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL35A.dataset already exists RPL35A adata already tokenized.


 49%|████▉     | 1028/2106 [39:26<03:00,  5.99it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL23.dataset already exists RPL23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS28.dataset already exists MRPS28 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1H.dataset already exists ATP6V1H adata already tokenized.


 49%|████▉     | 1030/2106 [39:26<02:32,  7.04it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC73.dataset already exists CDC73 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CTR9.dataset already exists CTR9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD12.dataset already exists PSMD12 adata already tokenized.


 49%|████▉     | 1034/2106 [39:26<01:59,  9.00it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ALDOA.dataset already exists ALDOA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UPF2.dataset already exists UPF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL1.dataset already exists MRPL1 adata already tokenized.


 49%|████▉     | 1036/2106 [39:26<01:49,  9.77it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BARD1.dataset already exists BARD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF103.dataset already exists RNF103 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MYBL2.dataset already exists MYBL2 adata already tokenized.


 49%|████▉     | 1038/2106 [39:27<01:43, 10.33it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL38.dataset already exists RPL38 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BUB3.dataset already exists BUB3 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
 49%|████▉     | 1038/2106 [39:39<01:43, 10.33it/s]00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL54.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM22.dataset already exists TIMM22 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP214.dataset already exists NUP214 adata already tokenized.


 50%|████▉     | 1045/2106 [39:51<24:56,  1.41s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NEPRO.dataset already exists NEPRO adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RCCD1.dataset already exists RCCD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KDM2A.dataset already exists KDM2A adata already tokenized.


 50%|████▉     | 1047/2106 [39:51<17:28,  1.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUF2.dataset already exists NUF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FARSA.dataset already exists FARSA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KCMF1.dataset already exists KCMF1 adata already tokenized.


 50%|████▉     | 1051/2106 [39:51<09:01,  1.95it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YARS.dataset already exists YARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TARS.dataset already exists TARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARPC4.dataset already exists ARPC4 adata already tokenized.


 50%|█████     | 1053/2106 [39:52<06:41,  2.62it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSPD1.dataset already exists HSPD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CMPK1.dataset already exists CMPK1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PNISR.dataset already exists PNISR adata already tokenized.


 50%|█████     | 1057/2106 [39:52<03:56,  4.43it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YTHDC1.dataset already exists YTHDC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIF23.dataset already exists KIF23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS19.dataset already exists RPS19 adata already tokenized.


 50%|█████     | 1059/2106 [39:52<03:11,  5.47it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FARSB.dataset already exists FARSB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GABPA.dataset already exists GABPA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPR.dataset already exists HNRNPR adata already tokenized.


 50%|█████     | 1061/2106 [39:52<02:40,  6.51it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL24.dataset already exists MRPL24 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MYC.dataset already exists MYC adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.69 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.07 examples/s][A
 50%|█████     | 1061/2106 [40:09<02:40,  6.51it/s]00:08, 22.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPAP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAE1.dataset already exists NAE1 adata already tokenized.


 51%|█████     | 1066/2106 [40:16<34:44,  2.00s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MMS22L.dataset already exists MMS22L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUDT21.dataset already exists NUDT21 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.69 examples/s][A
 51%|█████     | 1066/2106 [40:29<34:44,  2.00s/it]00:13, 20.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 24.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TEAD3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFB3.dataset already exists NDUFB3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL35.dataset already exists MRPL35 adata already tokenized.


 51%|█████     | 1072/2106 [40:40<35:01,  2.03s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDB1.dataset already exists DDB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CS.dataset already exists CS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CAMLG.dataset already exists CAMLG adata already tokenized.


 51%|█████     | 1074/2106 [40:41<23:40,  1.38s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2H3.dataset already exists GTF2H3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TYK2.dataset already exists TYK2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GRWD1.dataset already exists GRWD1 adata already tokenized.


 51%|█████     | 1078/2106 [40:41<11:34,  1.48it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PIAS1.dataset already exists PIAS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EMC1.dataset already exists EMC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPP40.dataset already exists RPP40 adata already tokenized.


 51%|█████▏    | 1080/2106 [40:41<08:25,  2.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/JAZF1.dataset already exists JAZF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TXNL4A.dataset already exists TXNL4A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VEZT.dataset already exists VEZT adata already tokenized.


 51%|█████▏    | 1084/2106 [40:41<04:45,  3.58it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LIMS1.dataset already exists LIMS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLOCK.dataset already exists CLOCK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CASC3.dataset already exists CASC3 adata already tokenized.


 52%|█████▏    | 1086/2106 [40:42<03:44,  4.55it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAN.dataset already exists RAN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RSL24D1.dataset already exists RSL24D1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRRT.dataset already exists SRRT adata already tokenized.


 52%|█████▏    | 1088/2106 [40:42<03:00,  5.63it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSM7.dataset already exists LSM7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCDC137.dataset already exists CCDC137 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:22, 20.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 25.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD4.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM6.dataset already exists MCM6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KLF7.dataset already exists KLF7 adata already tokenized.


 52%|█████▏    | 1094/2106 [41:02<26:40,  1.58s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CYC1.dataset already exists CYC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL42.dataset already exists MRPL42 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZRANB2.dataset already exists ZRANB2 adata already tokenized.


 52%|█████▏    | 1098/2106 [41:02<13:43,  1.22it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC3.dataset already exists THOC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KARS.dataset already exists KARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJC9.dataset already exists DNAJC9 adata already tokenized.


 52%|█████▏    | 1100/2106 [41:03<10:00,  1.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SETDB1.dataset already exists SETDB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELOVL1.dataset already exists ELOVL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PYROXD1.dataset already exists PYROXD1 adata already tokenized.


 52%|█████▏    | 1104/2106 [41:03<05:34,  3.00it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP107.dataset already exists NUP107 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5MD.dataset already exists ATP5MD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NASP.dataset already exists NASP adata already tokenized.


 53%|█████▎    | 1106/2106 [41:03<04:19,  3.86it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INCENP.dataset already exists INCENP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NARS.dataset already exists NARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL36.dataset already exists MRPL36 adata already tokenized.


 53%|█████▎    | 1108/2106 [41:03<03:25,  4.85it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PYURF.dataset already exists PYURF adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.16 examples/s][A
 53%|█████▎    | 1108/2106 [41:19<03:25,  4.85it/s]00:08, 25.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAZ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CALR.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR70.dataset already exists WDR70 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNM1.dataset already exists DNM1 adata already tokenized.


 53%|█████▎    | 1115/2106 [41:50<41:10,  2.49s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TXN.dataset already exists TXN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP1A.dataset already exists CHMP1A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP11.dataset already exists UTP11 adata already tokenized.


 53%|█████▎    | 1117/2106 [41:50<27:39,  1.68s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAGOH.dataset already exists MAGOH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPA3.dataset already exists RPA3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRIAP1.dataset already exists TRIAP1 adata already tokenized.


 53%|█████▎    | 1121/2106 [41:50<13:23,  1.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOC1.dataset already exists EXOC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PLRG1.dataset already exists PLRG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS51.dataset already exists VPS51 adata already tokenized.


 53%|█████▎    | 1123/2106 [41:50<09:36,  1.71it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LTBP3.dataset already exists LTBP3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DAXX.dataset already exists DAXX adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.27 examples/s][A
 53%|█████▎    | 1123/2106 [42:09<09:36,  1.71it/s]00:07, 21.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLCC1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHD4.dataset already exists CHD4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MTPAP.dataset already exists MTPAP adata already tokenized.


 54%|█████▎    | 1129/2106 [42:17<28:43,  1.76s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS5.dataset already exists COPS5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC20.dataset already exists CDC20 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ITGB1BP1.dataset already exists ITGB1BP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TONSL.dataset already exists TONSL adata already tokenized.


  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.26 examples/s][A
 54%|█████▎    | 1129/2106 [42:29<28:43,  1.76s/it]00:07, 30.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AURKAIP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS2.dataset already exists COPS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMED10.dataset already exists TMED10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.78 examples/s][A
 54%|█████▍    | 1132/2106 [42:49<1:01:14,  3.77s/it]:09, 28.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DAD1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AURKA.dataset already exists AURKA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX18.dataset already exists DDX18 adata already tokenized.


 54%|█████▍    | 1139/2106 [42:59<34:07,  2.12s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL55.dataset already exists MRPL55 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRMT6.dataset already exists TRMT6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TACC3.dataset already exists TACC3 adata already tokenized.


 54%|█████▍    | 1141/2106 [43:00<23:17,  1.45s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2A.dataset already exists POLR2A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAT2A.dataset already exists MAT2A adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHAF1A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KXD1.dataset already exists KXD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS2.dataset already exists RPS2 adata already tokenized.


 54%|█████▍    | 1147/2106 [43:23<28:35,  1.79s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRNP27.dataset already exists SNRNP27 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5PD.dataset already exists ATP5PD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC1A5.dataset already exists SLC1A5 adata already tokenized.


 55%|█████▍    | 1149/2106 [43:23<19:25,  1.22s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TWF1.dataset already exists TWF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCRH.dataset already exists UQCRH adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPSF3.dataset already exists CPSF3 adata already tokenized.


 55%|█████▍    | 1153/2106 [43:23<09:41,  1.64it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RTCB.dataset already exists RTCB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMUG1.dataset already exists SMUG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED30.dataset already exists MED30 adata already tokenized.


 55%|█████▍    | 1155/2106 [43:23<07:05,  2.24it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SETD1A.dataset already exists SETD1A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS16.dataset already exists MRPS16 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.72 examples/s][A
 55%|█████▍    | 1155/2106 [43:40<07:05,  2.24it/s]00:05, 24.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EHMT2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATIC.dataset already exists ATIC adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SUDS3.dataset already exists SUDS3 adata already tokenized.


 55%|█████▌    | 1161/2106 [43:45<22:57,  1.46s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FEN1.dataset already exists FEN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DBF4.dataset already exists DBF4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CBX1.dataset already exists CBX1 adata already tokenized.


 55%|█████▌    | 1163/2106 [43:45<15:44,  1.00s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HPS5.dataset already exists HPS5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XRCC2.dataset already exists XRCC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PNN.dataset already exists PNN adata already tokenized.


 55%|█████▌    | 1167/2106 [43:46<07:58,  1.96it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZC3H18.dataset already exists ZC3H18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2H4.dataset already exists GTF2H4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL32.dataset already exists MRPL32 adata already tokenized.


 56%|█████▌    | 1169/2106 [43:46<05:50,  2.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFC4.dataset already exists RFC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HAUS4.dataset already exists HAUS4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 24.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 26.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.96 examples/s][A
 56%|█████▌    | 1169/2106 [44:00<05:50,  2.67it/s]00:07, 24.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CSNK2B.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MFN2.dataset already exists MFN2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UFL1.dataset already exists UFL1 adata already tokenized.


 56%|█████▌    | 1175/2106 [44:08<22:07,  1.43s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDHD1.dataset already exists WDHD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SHC1.dataset already exists SHC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CSDE1.dataset already exists CSDE1 adata already tokenized.


 56%|█████▌    | 1177/2106 [44:08<15:12,  1.02it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRP19.dataset already exists SRP19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAP30BP.dataset already exists SAP30BP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TOMM20.dataset already exists TOMM20 adata already tokenized.


 56%|█████▌    | 1181/2106 [44:08<07:43,  2.00it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TP53I13.dataset already exists TP53I13 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METAP2.dataset already exists METAP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF236.dataset already exists ZNF236 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.89 examples/s][A
 56%|█████▌    | 1181/2106 [44:20<07:43,  2.00it/s]00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMG5.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COG2.dataset already exists COG2 adata already tokenized.


 56%|█████▋    | 1185/2106 [44:33<38:48,  2.53s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELP6.dataset already exists ELP6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABCB7.dataset already exists ABCB7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL12.dataset already exists RPL12 adata already tokenized.


 56%|█████▋    | 1189/2106 [44:34<17:32,  1.15s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NELFCD.dataset already exists NELFCD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GART.dataset already exists GART adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDAP1.dataset already exists PDAP1 adata already tokenized.


 57%|█████▋    | 1191/2106 [44:34<12:14,  1.25it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BOD1L1.dataset already exists BOD1L1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIAA0586.dataset already exists KIAA0586 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP62.dataset already exists NUP62 adata already tokenized.


 57%|█████▋    | 1193/2106 [44:34<08:46,  1.73it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CYFIP1.dataset already exists CYFIP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UXS1.dataset already exists UXS1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.81 examples/s][A
 57%|█████▋    | 1193/2106 [44:50<08:46,  1.73it/s]00:11, 21.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PHF5A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LTBP4.dataset already exists LTBP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPME1.dataset already exists PPME1 adata already tokenized.


 57%|█████▋    | 1200/2106 [45:00<24:59,  1.65s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AC118549.dataset already exists AC118549.1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTC4.dataset already exists TTC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRAPPC3.dataset already exists TRAPPC3 adata already tokenized.


 57%|█████▋    | 1202/2106 [45:01<17:23,  1.15s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCNA.dataset already exists PCNA adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF11.dataset already exists TAF11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS7.dataset already exists INTS7 adata already tokenized.


 57%|█████▋    | 1206/2106 [45:01<08:50,  1.70it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COG3.dataset already exists COG3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX6C.dataset already exists COX6C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOP56.dataset already exists NOP56 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DPH3.dataset already exists DPH3 adata already tokenized.


  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.06 examples/s][A
 57%|█████▋    | 1206/2106 [45:20<08:50,  1.70it/s]00:08, 21.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TSR2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1E1.dataset already exists ATP6V1E1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNX15.dataset already exists SNX15 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.58 examples/s][A
 57%|█████▋    | 1209/2106 [45:40<54:51,  3.67s/it]00:15, 20.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZMAT5.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP23.dataset already exists UTP23 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
 58%|█████▊    | 1212/2106 [46:10<1:23:29,  5.60s/it]:05, 29.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM22.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMNDC1.dataset already exists SMNDC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF3C5.dataset already exists GTF3C5 adata already tokenized.


 58%|█████▊    | 1218/2106 [46:17<40:40,  2.75s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS6.dataset already exists INTS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UNC45A.dataset already exists UNC45A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR1B.dataset already exists POLR1B adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.16 examples/s][A
 58%|█████▊    | 1218/2106 [46:30<40:40,  2.75s/it]00:07, 30.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM23B.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COQ5.dataset already exists COQ5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STX5.dataset already exists STX5 adata already tokenized.


 58%|█████▊    | 1224/2106 [46:37<28:10,  1.92s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EFR3A.dataset already exists EFR3A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPF.dataset already exists SNRPF adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IPO11.dataset already exists IPO11 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS21.dataset already exists RPS21 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.95 examples/s][A
 58%|█████▊    | 1224/2106 [46:50<28:10,  1.92s/it]00:10, 26.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TFB2M.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNHIT1.dataset already exists ZNHIT1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NPM1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BANF1.dataset already exists BANF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPE.dataset already exists COPE adata already tokenized.


 59%|█████▊    | 1233/2106 [47:23<34:11,  2.35s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBGCP2.dataset already exists TUBGCP2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/REV3L.dataset already exists REV3L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPRKB.dataset already exists TPRKB adata already tokenized.


 59%|█████▊    | 1235/2106 [47:23<22:48,  1.57s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COASY.dataset already exists COASY adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF4H.dataset already exists EIF4H adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TADA3.dataset already exists TADA3 adata already tokenized.


 59%|█████▉    | 1239/2106 [47:23<11:00,  1.31it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRNP40.dataset already exists SNRNP40 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED8.dataset already exists MED8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMM10.dataset already exists TIMM10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.34 examples/s][A
 59%|█████▉    | 1239/2106 [47:40<11:00,  1.31it/s]00:10, 21.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNM1L.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VARS.dataset already exists VARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPK.dataset already exists CENPK adata already tokenized.


 59%|█████▉    | 1245/2106 [47:49<25:10,  1.75s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/METTL14.dataset already exists METTL14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCND1.dataset already exists CCND1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PAM16.dataset already exists PAM16 adata already tokenized.


 59%|█████▉    | 1247/2106 [47:49<17:08,  1.20s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PFDN4.dataset already exists PFDN4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMEM127.dataset already exists TMEM127 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATR.dataset already exists ATR adata already tokenized.


 59%|█████▉    | 1251/2106 [47:49<08:32,  1.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM48.dataset already exists RBM48 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCUN1D5.dataset already exists DCUN1D5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLE.dataset already exists POLE adata already tokenized.


 59%|█████▉    | 1253/2106 [47:49<06:14,  2.28it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2B.dataset already exists POLR2B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GBF1.dataset already exists GBF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WBP1L.dataset already exists WBP1L adata already tokenized.


 60%|█████▉    | 1257/2106 [47:50<03:35,  3.94it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C19orf25.dataset already exists C19orf25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFS5.dataset already exists NDUFS5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL36.dataset already exists RPL36 adata already tokenized.


 60%|█████▉    | 1259/2106 [47:50<02:50,  4.97it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AGBL5.dataset already exists AGBL5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS26.dataset already exists MRPS26 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBM8A.dataset already exists RBM8A adata already tokenized.


 60%|█████▉    | 1263/2106 [47:50<01:57,  7.20it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL6.dataset already exists RPL6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS26.dataset already exists RPS26 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2B2.dataset already exists EIF2B2 adata already tokenized.


 60%|██████    | 1265/2106 [47:50<01:41,  8.25it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC39A9.dataset already exists SLC39A9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCP110.dataset already exists CCP110 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YBX1.dataset already exists YBX1 adata already tokenized.


 60%|██████    | 1269/2106 [47:50<01:23,  9.97it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KDM6A.dataset already exists KDM6A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMC2.dataset already exists PSMC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD2.dataset already exists PSMD2 adata already tokenized.


 60%|██████    | 1271/2106 [47:51<01:19, 10.50it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRIM2.dataset already exists PRIM2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V0B.dataset already exists ATP6V0B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MAK16.dataset already exists MAK16 adata already tokenized.


 61%|██████    | 1275/2106 [47:51<01:12, 11.53it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMED2.dataset already exists TMED2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PELP1.dataset already exists PELP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR3.dataset already exists WDR3 adata already tokenized.


 61%|██████    | 1277/2106 [47:51<01:10, 11.78it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFT1.dataset already exists RFT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF3C1.dataset already exists GTF3C1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.62 examples/s][A
 61%|██████    | 1277/2106 [48:10<01:10, 11.78it/s]00:05, 26.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TCOF1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSPA9.dataset already exists HSPA9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HYOU1.dataset already exists HYOU1 adata already tokenized.


 61%|██████    | 1283/2106 [48:17<21:44,  1.58s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DYNLL2.dataset already exists DYNLL2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP18.dataset already exists UTP18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJA1.dataset already exists DNAJA1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR74.dataset already exists WDR74 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.95 examples/s][A
 61%|██████    | 1283/2106 [48:30<21:44,  1.58s/it]00:12, 21.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF84.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR43.dataset already exists WDR43 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP1R15B.dataset already exists PPP1R15B adata already tokenized.


 61%|██████▏   | 1290/2106 [48:41<24:12,  1.78s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRS1.dataset already exists RRS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBMX.dataset already exists RBMX adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AQR.dataset already exists AQR adata already tokenized.


 61%|██████▏   | 1292/2106 [48:41<16:42,  1.23s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MALSU1.dataset already exists MALSU1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLASRP.dataset already exists CLASRP adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.56 examples/s][A
 61%|██████▏   | 1292/2106 [49:00<16:42,  1.23s/it]00:05, 22.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMG7.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA38.dataset already exists NAA38 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA15.dataset already exists NAA15 adata already tokenized.


 62%|██████▏   | 1296/2106 [49:05<36:42,  2.72s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS19BP1.dataset already exists RPS19BP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.92 examples/s][A
 62%|██████▏   | 1296/2106 [49:20<36:42,  2.72s/it]00:11, 20.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MYBBP1A.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMB4.dataset already exists PSMB4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX37.dataset already exists DHX37 adata already tokenized.


 62%|██████▏   | 1302/2106 [49:31<30:39,  2.29s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6AP1.dataset already exists ATP6AP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GOSR2.dataset already exists GOSR2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CFDP1.dataset already exists CFDP1 adata already tokenized.


 62%|██████▏   | 1304/2106 [49:32<20:27,  1.53s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS16.dataset already exists RPS16 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MBTPS1.dataset already exists MBTPS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ACTL6A.dataset already exists ACTL6A adata already tokenized.


 62%|██████▏   | 1308/2106 [49:32<09:50,  1.35it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ERAL1.dataset already exists ERAL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CNOT10.dataset already exists CNOT10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAC1.dataset already exists RAC1 adata already tokenized.


 62%|██████▏   | 1310/2106 [49:32<07:04,  1.87it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS14.dataset already exists RPS14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STRAP.dataset already exists STRAP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL37.dataset already exists MRPL37 adata already tokenized.


 62%|██████▏   | 1314/2106 [49:32<03:53,  3.39it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CACNB3.dataset already exists CACNB3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CINP.dataset already exists CINP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2H.dataset already exists UBE2H adata already tokenized.


 62%|██████▏   | 1316/2106 [49:32<02:59,  4.40it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTPN23.dataset already exists PTPN23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1F.dataset already exists ATP6V1F adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPN2.dataset already exists RPN2 adata already tokenized.


 63%|██████▎   | 1320/2106 [49:33<02:01,  6.46it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL27A.dataset already exists RPL27A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PHB.dataset already exists PHB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS13.dataset already exists RPS13 adata already tokenized.


 63%|██████▎   | 1322/2106 [49:33<01:42,  7.64it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAF2.dataset already exists FAF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARPC1B.dataset already exists ARPC1B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMA5.dataset already exists PSMA5 adata already tokenized.


 63%|██████▎   | 1326/2106 [49:33<01:21,  9.51it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PIAS4.dataset already exists PIAS4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GYG1.dataset already exists GYG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPN.dataset already exists CENPN adata already tokenized.


 63%|██████▎   | 1328/2106 [49:33<01:15, 10.26it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GON4L.dataset already exists GON4L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BANP.dataset already exists BANP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPA.dataset already exists SNRPA adata already tokenized.


 63%|██████▎   | 1332/2106 [49:34<01:08, 11.32it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KANSL1.dataset already exists KANSL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFB7.dataset already exists NDUFB7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TSEN34.dataset already exists TSEN34 adata already tokenized.


 63%|██████▎   | 1334/2106 [49:34<01:06, 11.54it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MIER1.dataset already exists MIER1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX5A.dataset already exists COX5A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NXF1.dataset already exists NXF1 adata already tokenized.


 64%|██████▎   | 1338/2106 [49:34<01:03, 12.04it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PGS1.dataset already exists PGS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX1.dataset already exists DDX1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GNL2.dataset already exists GNL2 adata already tokenized.


 64%|██████▎   | 1340/2106 [49:34<01:03, 12.05it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MTG2.dataset already exists MTG2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NSMCE1.dataset already exists NSMCE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL5.dataset already exists RPL5 adata already tokenized.


 64%|██████▍   | 1344/2106 [49:35<01:03, 12.06it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FDXR.dataset already exists FDXR adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDTC1.dataset already exists WDTC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HBS1L.dataset already exists HBS1L adata already tokenized.


 64%|██████▍   | 1346/2106 [49:35<01:03, 12.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMARCE1.dataset already exists SMARCE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARMC6.dataset already exists ARMC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MCM3AP.dataset already exists MCM3AP adata already tokenized.


 64%|██████▍   | 1350/2106 [49:35<01:00, 12.40it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YAE1.dataset already exists YAE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL51.dataset already exists MRPL51 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YKT6.dataset already exists YKT6 adata already tokenized.


 64%|██████▍   | 1352/2106 [49:35<01:01, 12.29it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL30.dataset already exists RPL30 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX10.dataset already exists COX10 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 21.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
 64%|██████▍   | 1352/2106 [49:50<01:01, 12.29it/s]00:11, 21.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 19.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:06, 20.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PUM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR61.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRBD1.dataset already exists SRBD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GLI4.dataset already exists GLI4 adata already tokenized.


 64%|██████▍   | 1358/2106 [50:49<1:06:52,  5.36s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF4.dataset already exists PRPF4 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.77 examples/s][A
 64%|██████▍   | 1358/2106 [51:00<1:06:52,  5.36s/it]:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COMTD1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CAPZB.dataset already exists CAPZB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TTC27.dataset already exists TTC27 adata already tokenized.


 65%|██████▍   | 1362/2106 [51:12<51:54,  4.19s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSD17B12.dataset already exists HSD17B12 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ALG14.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ARGLU1.dataset already exists ARGLU1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CCDC6.dataset already exists CCDC6 adata already tokenized.


 65%|██████▍   | 1366/2106 [51:34<46:03,  3.73s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LARS2.dataset already exists LARS2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPN1.dataset already exists RPN1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.89 examples/s][A
 65%|██████▍   | 1366/2106 [51:50<46:03,  3.73s/it]00:06, 28.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FIP1L1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PREB.dataset already exists PREB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPP30.dataset already exists RPP30 adata already tokenized.


 65%|██████▌   | 1371/2106 [51:58<41:12,  3.36s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX41.dataset already exists DDX41 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD11.dataset already exists PSMD11 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:20, 19.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.78 examples/s][A
 65%|██████▌   | 1371/2106 [52:10<41:12,  3.36s/it]00:17, 19.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 19.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 19.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:16<00:09, 19.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 19.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:06, 19.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED27.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL39.dataset already exists MRPL39 adata already tokenized.


 65%|██████▌   | 1376/2106 [52:27<45:14,  3.72s/it]  

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBL3.dataset already exists TBL3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF4B.dataset already exists PRPF4B adata already tokenized.


 65%|██████▌   | 1379/2106 [52:27<24:27,  2.02s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRAPPC5.dataset already exists TRAPPC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BTAF1.dataset already exists BTAF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GSPT1.dataset already exists GSPT1 adata already tokenized.


 66%|██████▌   | 1381/2106 [52:27<15:02,  1.24s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GLRX5.dataset already exists GLRX5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AAR2.dataset already exists AAR2 adata already tokenized.


 66%|██████▌   | 1383/2106 [52:28<08:46,  1.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZDHHC7.dataset already exists ZDHHC7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNAPC5.dataset already exists SNAPC5 adata already tokenized.


 66%|██████▌   | 1384/2106 [52:28<06:41,  1.80it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)


Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.77 examples/s][A
 80%

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBCD.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3M.dataset already exists EIF3M adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HECTD1.dataset already exists HECTD1 adata already tokenized.


 81%|████████  | 1699/2106 [1:05:33<15:07,  2.23s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC23.dataset already exists CDC23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAGLU.dataset already exists NAGLU adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XRN1.dataset already exists XRN1 adata already tokenized.


 81%|████████  | 1703/2106 [1:05:33<07:19,  1.09s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HEATR1.dataset already exists HEATR1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSPA5.dataset already exists HSPA5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX7C.dataset already exists COX7C adata already tokenized.


 81%|████████  | 1705/2106 [1:05:33<05:12,  1.28it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP15.dataset already exists UTP15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBQLN4.dataset already exists UBQLN4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NKAPD1.dataset already exists NKAPD1 adata already tokenized.


 81%|████████  | 1709/2106 [1:05:34<02:46,  2.38it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USP39.dataset already exists USP39 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TFDP1.dataset already exists TFDP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC7.dataset already exists EXOSC7 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR4.dataset already exists WDR4 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRDM4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NBAS.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COX15.dataset already exists COX15 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VPS41.dataset already exists VPS41 adata already tokenized.


 82%|████████▏ | 1717/2106 [1:06:19<15:40,  2.42s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2A2.dataset already exists GTF2A2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LAMB1.dataset already exists LAMB1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF13.dataset already exists TAF13 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.47 examples/s][A
 82%|████████▏ | 1717/2106 [1:06:32<15:40,  2.42s/it]:14, 21.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:06, 20.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS27.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLD2.dataset already exists POLD2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
 83%|████████▎ | 1742/2106 [1:09:10<11:41,  1.93s/it]19, 20.98 examples/s][A

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHORDC1.dataset already exists CHORDC1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DRG1.dataset already exists DRG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EEF1A1.dataset already exists EEF1A1 adata already tokenized.


 83%|████████▎ | 1744/2106 [1:09:10<08:07,  1.35s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF706.dataset already exists ZNF706 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5F1C.dataset already exists ATP5F1C adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.19 examples/s][A
 83%|████████▎ | 1744/2106 [1:09:22<08:07,  1.35s/it]:14, 21.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SFPQ.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C5orf30.dataset already exists C5orf30 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPP14.dataset already exists RPP14 adata already tokenized.


 83%|████████▎ | 1750/2106 [1:09:36<11:28,  1.94s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF3A3.dataset already exists SF3A3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX19A.dataset already exists DDX19A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMG2.dataset already exists PSMG2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.25 examples/s][A
 83%|████████▎ | 1750/2106 [1:09:52<11:28,  1.94s/it]:08, 22.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 21.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRSF6.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPG1.dataset already exists COPG1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF3B3.dataset already exists SF3B3 adata already tokenized.


 83%|████████▎ | 1755/2106 [1:10:01<16:06,  2.75s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMC5.dataset already exists SMC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GEMIN5.dataset already exists GEMIN5 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.48 examples/s][A
 83%|████████▎ | 1755/2106 [1:10:12<16:06,  2.75s/it]:09, 29.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NPM3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VHL.dataset already exists VHL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LRR1.dataset already exists LRR1 adata already tokenized.


 84%|████████▎ | 1760/2106 [1:10:22<15:28,  2.68s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNAPC3.dataset already exists SNAPC3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BTF3L4.dataset already exists BTF3L4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PNO1.dataset already exists PNO1 adata already tokenized.


 84%|████████▍ | 1764/2106 [1:10:22<07:17,  1.28s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ECT2.dataset already exists ECT2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3A.dataset already exists POLR3A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMG3.dataset already exists PSMG3 adata already tokenized.


 84%|████████▍ | 1766/2106 [1:10:22<05:07,  1.11it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRP54.dataset already exists SRP54 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SCFD1.dataset already exists SCFD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS5.dataset already exists MRPS5 adata already tokenized.


 84%|████████▍ | 1770/2106 [1:10:23<02:40,  2.10it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRD2.dataset already exists BRD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABHD17A.dataset already exists ABHD17A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TGS1.dataset already exists TGS1 adata already tokenized.


 84%|████████▍ | 1772/2106 [1:10:23<01:58,  2.81it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IK.dataset already exists IK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RINT1.dataset already exists RINT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/OSTC.dataset already exists OSTC adata already tokenized.


 84%|████████▍ | 1774/2106 [1:10:23<01:30,  3.67it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POP5.dataset already exists POP5 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLD1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LEO1.dataset already exists LEO1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STAG2.dataset already exists STAG2 adata already tokenized.


 85%|████████▍ | 1780/2106 [1:10:45<07:31,  1.39s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS25.dataset already exists RPS25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX59.dataset already exists DDX59 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNGTT.dataset already exists RNGTT adata already tokenized.


 85%|████████▍ | 1782/2106 [1:10:45<05:10,  1.04it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USF2.dataset already exists USF2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC5.dataset already exists EXOSC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TCERG1.dataset already exists TCERG1 adata already tokenized.


 85%|████████▍ | 1786/2106 [1:10:45<02:37,  2.03it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF2B1.dataset already exists EIF2B1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF5A.dataset already exists EIF5A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK6.dataset already exists CDK6 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.00 examples/s][A
 85%|████████▍ | 1786/2106 [1:11:02<02:37,  2.03it/s]:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MANF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EBNA1BP2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK9.dataset already exists CDK9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPF2.dataset already exists RPF2 adata already tokenized.


 85%|████████▌ | 1792/2106 [1:11:30<17:40,  3.38s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EGLN2.dataset already exists EGLN2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/C17orf58.dataset already exists C17orf58 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDC1.dataset already exists SDC1 adata already tokenized.


 85%|████████▌ | 1796/2106 [1:11:30<08:08,  1.58s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS10.dataset already exists RPS10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ABT1.dataset already exists ABT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP7.dataset already exists CHMP7 adata already tokenized.


 85%|████████▌ | 1798/2106 [1:11:30<05:40,  1.11s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAPG.dataset already exists NAPG adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSAT1.dataset already exists PSAT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC37.dataset already exists CDC37 adata already tokenized.


 86%|████████▌ | 1802/2106 [1:11:30<02:53,  1.75it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MIPEP.dataset already exists MIPEP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL34.dataset already exists RPL34 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RBBP6.dataset already exists RBBP6 adata already tokenized.


 86%|████████▌ | 1804/2106 [1:11:31<02:07,  2.37it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL36A.dataset already exists RPL36A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED22.dataset already exists MED22 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SART3.dataset already exists SART3 adata already tokenized.


 86%|████████▌ | 1808/2106 [1:11:31<01:13,  4.07it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FASTKD5.dataset already exists FASTKD5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP11B.dataset already exists ATP11B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BIRC5.dataset already exists BIRC5 adata already tokenized.


 86%|████████▌ | 1810/2106 [1:11:31<00:57,  5.12it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ASF1B.dataset already exists ASF1B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX55.dataset already exists DDX55 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBXO42.dataset already exists FBXO42 adata already tokenized.


 86%|████████▌ | 1814/2106 [1:11:31<00:39,  7.45it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNPS1.dataset already exists RNPS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PRPF18.dataset already exists PRPF18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HNRNPU.dataset already exists HNRNPU adata already tokenized.


 86%|████████▌ | 1816/2106 [1:11:32<00:34,  8.52it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBCB.dataset already exists TBCB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS8.dataset already exists INTS8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STARD7.dataset already exists STARD7 adata already tokenized.


 86%|████████▋ | 1820/2106 [1:11:32<00:27, 10.22it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MDN1.dataset already exists MDN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRP72.dataset already exists SRP72 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3H.dataset already exists EIF3H adata already tokenized.


 87%|████████▋ | 1822/2106 [1:11:32<00:26, 10.79it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBC1D1.dataset already exists TBC1D1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS8.dataset already exists COPS8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP5.dataset already exists CHMP5 adata already tokenized.


 87%|████████▋ | 1826/2106 [1:11:32<00:23, 11.95it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XRCC6.dataset already exists XRCC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RTF1.dataset already exists RTF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DUT.dataset already exists DUT adata already tokenized.


 87%|████████▋ | 1828/2106 [1:11:33<00:23, 12.02it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NAA25.dataset already exists NAA25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EDC4.dataset already exists EDC4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HLA-C.dataset already exists HLA-C adata already tokenized.


 87%|████████▋ | 1832/2106 [1:11:33<00:22, 12.30it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CWF19L2.dataset already exists CWF19L2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BTF3.dataset already exists BTF3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAM136A.dataset already exists FAM136A adata already tokenized.


 87%|████████▋ | 1834/2106 [1:11:33<00:21, 12.44it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RHOQ.dataset already exists RHOQ adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS11.dataset already exists INTS11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HUS1.dataset already exists HUS1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.96 examples/s][A
 87%|████████▋ | 1834/2106 [1:11:53<00:21, 12.44it/s]:04, 25.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP1A1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/URM1.dataset already exists URM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PAXBP1.dataset already exists PAXBP1 adata already tokenized.


 87%|████████▋ | 1840/2106 [1:11:58<08:29,  1.91s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EFTUD2.dataset already exists EFTUD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP6.dataset already exists UTP6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MYSM1.dataset already exists MYSM1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.58 examples/s][A
 87%|████████▋ | 1840/2106 [1:12:13<08:29,  1.91s/it]:12, 22.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDUFA6.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMAGP.dataset already exists SMAGP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GABPB1.dataset already exists GABPB1 adata already tokenized.


 88%|████████▊ | 1846/2106 [1:12:23<10:44,  2.48s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF5.dataset already exists TAF5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL12.dataset already exists NOL12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NELFB.dataset already exists NELFB adata already tokenized.


 88%|████████▊ | 1850/2106 [1:12:23<05:21,  1.25s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPG.dataset already exists SNRPG adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GRSF1.dataset already exists GRSF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MVD.dataset already exists MVD adata already tokenized.


 88%|████████▊ | 1852/2106 [1:12:23<03:48,  1.11it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SASS6.dataset already exists SASS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HAUS6.dataset already exists HAUS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2F1.dataset already exists GTF2F1 adata already tokenized.


 88%|████████▊ | 1856/2106 [1:12:24<02:00,  2.08it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFC2.dataset already exists RFC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/INTS14.dataset already exists INTS14 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLE2.dataset already exists POLE2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.26 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 25.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 22.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 21.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.24 examples/s][A
 88%|████████▊ | 1856/2106 [1:12:43<02:00,  2.08it/s]:05, 21.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SP1.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPP38.dataset already exists RPP38 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPAIN.dataset already exists RPAIN adata already tokenized.


 88%|████████▊ | 1862/2106 [1:12:49<06:43,  1.65s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FNBP4.dataset already exists FNBP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBP.dataset already exists TBP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LARS.dataset already exists LARS adata already tokenized.


 89%|████████▊ | 1864/2106 [1:12:49<04:34,  1.13s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NFYB.dataset already exists NFYB adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/N6AMT1.dataset already exists N6AMT1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.41 examples/s][A
 89%|████████▊ | 1864/2106 [1:13:03<04:34,  1.13s/it]:09, 27.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATL2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GFM1.dataset already exists GFM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF3A2.dataset already exists SF3A2 adata already tokenized.


 89%|████████▊ | 1869/2106 [1:13:14<09:38,  2.44s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SACM1L.dataset already exists SACM1L adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KIN.dataset already exists KIN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KRT8.dataset already exists KRT8 adata already tokenized.


 89%|████████▉ | 1873/2106 [1:13:14<04:33,  1.17s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RFC5.dataset already exists RFC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1B2.dataset already exists ATP6V1B2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TIMELESS.dataset already exists TIMELESS adata already tokenized.


 89%|████████▉ | 1875/2106 [1:13:14<03:12,  1.20it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CSTF3.dataset already exists CSTF3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/COPS6.dataset already exists COPS6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOSC4.dataset already exists EXOSC4 adata already tokenized.


 89%|████████▉ | 1877/2106 [1:13:15<02:17,  1.66it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF1AD.dataset already exists EIF1AD adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.19 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMSB10.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMC6.dataset already exists SMC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BNIP1.dataset already exists BNIP1 adata already tokenized.


 91%|█████████ | 1917/2106 [1:15:07<13:09,  4.18s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPF1.dataset already exists RPF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRAPPC11.dataset already exists TRAPPC11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRMT10C.dataset already exists TRMT10C adata already tokenized.


 91%|█████████ | 1921/2106 [1:15:08<06:04,  1.97s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BMP2K.dataset already exists BMP2K adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SSRP1.dataset already exists SSRP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LRPPRC.dataset already exists LRPPRC adata already tokenized.


 91%|█████████▏| 1923/2106 [1:15:08<04:12,  1.38s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CD3EAP.dataset already exists CD3EAP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPG7.dataset already exists SPG7 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AP2M1.dataset already exists AP2M1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.21 examples/s][A
 91%|█████████▏| 1923/2106 [1:15:23<04:12,  1.38s/it]:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEC61B.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GTF2A1.dataset already exists GTF2A1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS6.dataset already exists RPS6 adata already tokenized.


 92%|█████████▏| 1929/2106 [1:15:31<06:33,  2.22s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HMGCS1.dataset already exists HMGCS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IARS.dataset already exists IARS adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RANGAP1.dataset already exists RANGAP1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.87 examples/s][A
 92%|█████████▏| 1929/2106 [1:15:43<06:33,  2.22s/it]:13, 23.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLA2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS21.dataset already exists MRPS21 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PGK1.dataset already exists PGK1 adata already tokenized.


 92%|█████████▏| 1935/2106 [1:15:58<07:50,  2.75s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TBPL1.dataset already exists TBPL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELP3.dataset already exists ELP3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ALYREF.dataset already exists ALYREF adata already tokenized.


 92%|█████████▏| 1939/2106 [1:15:58<03:51,  1.39s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BUD13.dataset already exists BUD13 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNMT1.dataset already exists DNMT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/IFITM2.dataset already exists IFITM2 adata already tokenized.


 92%|█████████▏| 1941/2106 [1:15:59<02:44,  1.01it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BUD23.dataset already exists BUD23 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THOC6.dataset already exists THOC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DYNLL1.dataset already exists DYNLL1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.26 examples/s][A
 92%|█████████▏| 1941/2106 [1:16:13<02:44,  1.01it/s]:07, 28.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMB3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRNP25.dataset already exists SNRNP25 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PIGH.dataset already exists PIGH adata already tokenized.


 92%|█████████▏| 1947/2106 [1:16:22<05:31,  2.09s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2I.dataset already exists UBE2I adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL23A.dataset already exists RPL23A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHMP6.dataset already exists CHMP6 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.11 examples/s][A
 92%|█████████▏| 1947/2106 [1:16:33<05:31,  2.09s/it]:10, 28.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CLPB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPIH.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNF4.dataset already exists RNF4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TFIP11.dataset already exists TFIP11 adata already tokenized.


 93%|█████████▎| 1954/2106 [1:17:13<10:04,  3.98s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS9.dataset already exists MRPS9 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAMM50.dataset already exists SAMM50 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRCAP.dataset already exists SRCAP adata already tokenized.


 93%|█████████▎| 1958/2106 [1:17:13<04:43,  1.92s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USP10.dataset already exists USP10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NDC80.dataset already exists NDC80 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NMT1.dataset already exists NMT1 adata already tokenized.


 93%|█████████▎| 1960/2106 [1:17:13<03:17,  1.35s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POGLUT3.dataset already exists POGLUT3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMC6.dataset already exists PSMC6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBB.dataset already exists TUBB adata already tokenized.


 93%|█████████▎| 1964/2106 [1:17:13<01:38,  1.44it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZBTB11.dataset already exists ZBTB11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCID2.dataset already exists PCID2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMG9.dataset already exists SMG9 adata already tokenized.


 93%|█████████▎| 1966/2106 [1:17:14<01:11,  1.96it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EXOC5.dataset already exists EXOC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BCL2L1.dataset already exists BCL2L1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SMG1.dataset already exists SMG1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.02 examples/s][A
 93%|█████████▎| 1966/2106 [1:17:33<01:11,  1.96it/s]:07, 20.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:06, 20.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KANSL3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELAC2.dataset already exists ELAC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CENPA.dataset already exists CENPA adata already tokenized.


 94%|█████████▎| 1972/2106 [1:17:41<04:56,  2.21s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PPP4C.dataset already exists PPP4C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GLRX3.dataset already exists GLRX3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ING3.dataset already exists ING3 adata already tokenized.


 94%|█████████▍| 1976/2106 [1:17:41<02:26,  1.12s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR18.dataset already exists WDR18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LPIN1.dataset already exists LPIN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RAD51.dataset already exists RAD51 adata already tokenized.


 94%|█████████▍| 1978/2106 [1:17:41<01:43,  1.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SLC35A4.dataset already exists SLC35A4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/YBX3.dataset already exists YBX3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MBD3.dataset already exists MBD3 adata already tokenized.


 94%|█████████▍| 1982/2106 [1:17:42<00:54,  2.27it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XRCC5.dataset already exists XRCC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UTP20.dataset already exists UTP20 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/XPO5.dataset already exists XPO5 adata already tokenized.


 94%|█████████▍| 1984/2106 [1:17:42<00:40,  3.00it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RNASEH2C.dataset already exists RNASEH2C adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SDHAF2.dataset already exists SDHAF2 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.19 examples/s][A
 94%|█████████▍| 1984/2106 [1:17:53<00:40,  3.00it/s]:13, 23.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPP2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ISCA2.dataset already exists ISCA2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED17.dataset already exists MED17 adata already tokenized.


 94%|█████████▍| 1990/2106 [1:18:07<03:07,  1.62s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THAP11.dataset already exists THAP11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DGCR8.dataset already exists DGCR8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HGS.dataset already exists HGS adata already tokenized.


 95%|█████████▍| 1992/2106 [1:18:08<02:07,  1.11s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF1.dataset already exists TAF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HAUS8.dataset already exists HAUS8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2L.dataset already exists POLR2L adata already tokenized.


 95%|█████████▍| 1996/2106 [1:18:08<01:02,  1.77it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/F8A1.dataset already exists F8A1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BDP1.dataset already exists BDP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN2.dataset already exists DCTN2 adata already tokenized.


 95%|█████████▍| 1998/2106 [1:18:08<00:44,  2.42it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SAP130.dataset already exists SAP130 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/VBP1.dataset already exists VBP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ANKRD11.dataset already exists ANKRD11 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.56 examples/s][A
 95%|█████████▍| 1998/2106 [1:18:23<00:44,  2.42it/s]:09, 21.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NHP2.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UBE2Z.dataset already exists UBE2Z adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FTSJ3.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RRP12.dataset already exists RRP12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CHCHD4.dataset already exists CHCHD4 adata already tokenized.


 95%|█████████▌| 2006/2106 [1:19:01<05:42,  3.43s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GUCD1.dataset already exists GUCD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL35.dataset already exists RPL35 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HIRA.dataset already exists HIRA adata already tokenized.


 95%|█████████▌| 2010/2106 [1:19:01<02:44,  1.71s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR3E.dataset already exists POLR3E adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LSM5.dataset already exists LSM5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PITRM1.dataset already exists PITRM1 adata already tokenized.


 96%|█████████▌| 2012/2106 [1:19:01<01:54,  1.22s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MFAP1.dataset already exists MFAP1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DCTN6.dataset already exists DCTN6 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TAF3.dataset already exists TAF3 adata already tokenized.


 96%|█████████▌| 2014/2106 [1:19:01<01:20,  1.14it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CBLL1.dataset already exists CBLL1 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
 96%|█████████▌| 2014/2106 [1:19:13<01:20,  1.14it/s]:13, 23.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GEMIN6.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SPEN.dataset already exists SPEN adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NUP85.dataset already exists NUP85 adata already tokenized.


 96%|█████████▌| 2019/2106 [1:19:27<03:30,  2.42s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTPN1.dataset already exists PTPN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP5ME.dataset already exists ATP5ME adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/KAT7.dataset already exists KAT7 adata already tokenized.


 96%|█████████▌| 2023/2106 [1:19:28<01:38,  1.19s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SEM1.dataset already exists SEM1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SCAF1.dataset already exists SCAF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PI4KA.dataset already exists PI4KA adata already tokenized.


 96%|█████████▌| 2025/2106 [1:19:28<01:08,  1.18it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POLR2G.dataset already exists POLR2G adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL19.dataset already exists RPL19 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZCRB1.dataset already exists ZCRB1 adata already tokenized.


 96%|█████████▋| 2029/2106 [1:19:28<00:34,  2.21it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UQCC2.dataset already exists UQCC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PET117.dataset already exists PET117 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AKIRIN2.dataset already exists AKIRIN2 adata already tokenized.


 96%|█████████▋| 2031/2106 [1:19:28<00:25,  2.94it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CRNKL1.dataset already exists CRNKL1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/POT1.dataset already exists POT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LENG8.dataset already exists LENG8 adata already tokenized.


 97%|█████████▋| 2035/2106 [1:19:29<00:14,  4.86it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PEF1.dataset already exists PEF1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PABPN1.dataset already exists PABPN1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL10.dataset already exists RPL10 adata already tokenized.


 97%|█████████▋| 2037/2106 [1:19:29<00:11,  5.95it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDT1.dataset already exists CDT1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ACTR8.dataset already exists ACTR8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RSL1D1.dataset already exists RSL1D1 adata already tokenized.


 97%|█████████▋| 2041/2106 [1:19:29<00:08,  8.02it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GINS1.dataset already exists GINS1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPL13.dataset already exists RPL13 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SETX.dataset already exists SETX adata already tokenized.


 97%|█████████▋| 2043/2106 [1:19:29<00:07,  8.97it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AFG3L2.dataset already exists AFG3L2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBL.dataset already exists FBL adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BUB1B.dataset already exists BUB1B adata already tokenized.


 97%|█████████▋| 2047/2106 [1:19:30<00:05, 10.44it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RSRC2.dataset already exists RSRC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS17.dataset already exists MRPS17 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SNRPA1.dataset already exists SNRPA1 adata already tokenized.


 97%|█████████▋| 2049/2106 [1:19:30<00:05, 10.79it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/GLE1.dataset already exists GLE1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/UROD.dataset already exists UROD adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBRSL1.dataset already exists FBRSL1 adata already tokenized.


 97%|█████████▋| 2053/2106 [1:19:30<00:04, 11.22it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL3.dataset already exists MRPL3 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TMEM242.dataset already exists TMEM242 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTEN.dataset already exists PTEN adata already tokenized.


 98%|█████████▊| 2055/2106 [1:19:30<00:04, 11.56it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AAMP.dataset already exists AAMP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDK12.dataset already exists CDK12 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DDX46.dataset already exists DDX46 adata already tokenized.


 98%|█████████▊| 2059/2106 [1:19:31<00:03, 11.91it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/THUMPD1.dataset already exists THUMPD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCAPG2.dataset already exists NCAPG2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HCFC1.dataset already exists HCFC1 adata already tokenized.


 98%|█████████▊| 2061/2106 [1:19:31<00:03, 12.00it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SF3B2.dataset already exists SF3B2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC45.dataset already exists CDC45 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/AARS2.dataset already exists AARS2 adata already tokenized.


 98%|█████████▊| 2065/2106 [1:19:31<00:03, 12.30it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/BRD8.dataset already exists BRD8 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PDCD2.dataset already exists PDCD2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZRSR2.dataset already exists ZRSR2 adata already tokenized.


 98%|█████████▊| 2067/2106 [1:19:31<00:03, 12.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATXN10.dataset already exists ATXN10 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MED24.dataset already exists MED24 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL4.dataset already exists MRPL4 adata already tokenized.


 98%|█████████▊| 2071/2106 [1:19:32<00:02, 12.39it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELP4.dataset already exists ELP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STXBP4.dataset already exists STXBP4 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/LIN54.dataset already exists LIN54 adata already tokenized.


 98%|█████████▊| 2073/2106 [1:19:32<00:02, 12.23it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DTYMK.dataset already exists DTYMK adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PCF11.dataset already exists PCF11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NOL6.dataset already exists NOL6 adata already tokenized.


 99%|█████████▊| 2077/2106 [1:19:32<00:02, 12.15it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TPI1.dataset already exists TPI1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NEDD1.dataset already exists NEDD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FBXW7.dataset already exists FBXW7 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX9.dataset already exists DHX9 adata already tokenized.
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
 99%|█████████▊| 2077/2106 [1:19:43<00:02, 12.15it/s]:15, 21.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NELFA.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMC5.dataset already exists PSMC5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CPNE7.dataset already exists CPNE7 adata already tokenized.


 99%|█████████▉| 2083/2106 [1:19:56<00:40,  1.77s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/USP36.dataset already exists USP36 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TUBA1B.dataset already exists TUBA1B adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPS30.dataset already exists MRPS30 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.34 examples/s][A
 99%|█████████▉| 2083/2106 [1:20:13<00:40,  1.77s/it]:05, 27.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CDC6.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/TRMT5.dataset already exists TRMT5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSP90B1.dataset already exists HSP90B1 adata already tokenized.


 99%|█████████▉| 2090/2106 [1:20:20<00:29,  1.87s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ELP5.dataset already exists ELP5 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NKAP.dataset already exists NKAP adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DNAJC19.dataset already exists DNAJC19 adata already tokenized.


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.72 examples/s][A
 99%|█████████▉| 2090/2106 [1:20:34<00:29,  1.87s/it]:10, 25.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/NCAPG.dataset
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/STX18.dataset already exists STX18 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/WDR1.dataset already exists WDR1 adata already tokenized.


 99%|█████████▉| 2095/2106 [1:20:44<00:28,  2.61s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PSMD1.dataset already exists PSMD1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/FAM32A.dataset already exists FAM32A adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/HSD17B10.dataset already exists HSD17B10 adata already tokenized.


100%|█████████▉| 2099/2106 [1:20:44<00:08,  1.26s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ATP6V1C1.dataset already exists ATP6V1C1 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/RPS11.dataset already exists RPS11 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/ZNF718.dataset already exists ZNF718 adata already tokenized.


100%|█████████▉| 2101/2106 [1:20:45<00:04,  1.12it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/DHX29.dataset already exists DHX29 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SHOC2.dataset already exists SHOC2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/CWC22.dataset already exists CWC22 adata already tokenized.


100%|█████████▉| 2105/2106 [1:20:45<00:00,  2.13it/s]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/MRPL21.dataset already exists MRPL21 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/SRRM2.dataset already exists SRRM2 adata already tokenized.
/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/PTPMT1.dataset already exists PTPMT1 adata already tokenized.


100%|██████████| 2106/2106 [1:20:45<00:00,  2.30s/it]

/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert/EIF3B.dataset already exists EIF3B adata already tokenized.





#### Generate embeddings

In [34]:
def embed_pert_data(all_perts, all_perts_embex, non_exp_genes, token_dir, output_dir, 
                    emb_mode: str = 'cell', batch_size: int = 64, dataset: str = 'rpe1'):
    
    for pert in all_perts:
        
        if pert in non_exp_genes:
            pass
        
        elif pert in all_perts_embex:
            print(f'Embedding for {pert} already added to dict')
            
        elif Path(f"{output_dir}/{pert}_pert_{emb_mode}.csv").is_file():
            print(f'Embedding for {pert} already generated. Adding to dict ...')
            emb = pd.read_csv(f"{output_dir}/{pert}_pert_{emb_mode}.csv", index_col=0)
            all_perts_embex[pert] = emb
                             
        else:
            try:
                # initiate EmbExtractor
                embex = EmbExtractor(model_type="Pretrained",
                                     num_classes=0,
                                     emb_mode=emb_mode,
                                     emb_layer=0,
                                     max_ncells=20000,
                                     forward_batch_size=batch_size,
                                     nproc=16)

                # extracts embedding from input data
                # input data is tokenized rank value encodings generated by Geneformer tokenizer (see tokenizing_scRNAseq_data.ipynb)
                # example dataset: https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files/cell_classification/disease_classification/human_dcm_hcm_nf.dataset
                embs = embex.extract_embs("../bin/Geneformer",
                                          f'{token_dir}/{pert}.dataset',
                                          output_dir,
                                          f"{pert}_pert_{emb_mode}")

                all_perts_embex[pert] = embs
            
            except FileNotFoundError:
                print(f'{pert} file not found')
                pass
    
    with gzip.open(f"{d_path}/embeddings/{dataset}_perts_{emb_mode}.pkl.gz", 'wb') as f:
        
        pkl.dump(all_perts_embex, f)
        
    print(f'all_perts_embex saved to {d_path}/embeddings/{dataset}_perts_{emb_mode}.pkl.gz')
        
    return all_perts_embex

In [38]:
#rpe1_all_perts_embex = {}
rpe1_non_exp_gene = rpe1_mask.columns[rpe1_mask.sum() == 0]

rpe1_tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data/rpe1_pert'
rpe1_out_dir = f"{d_path}/embeddings/perts/rpe1"

rpe1_all_perts_embex = embed_pert_data(rpe1_perts, rpe1_all_perts_embex, rpe1_non_exp_gene, 
                                       rpe1_tokenized_dir, rpe1_out_dir,dataset='rpe1')

Embedding for RPS27A already generated. Adding to dict ...
Embedding for UTP15 already generated. Adding to dict ...
Embedding for NEDD1 already generated. Adding to dict ...
Embedding for PFDN5 already generated. Adding to dict ...
Embedding for TRMT10C already generated. Adding to dict ...
Embedding for PSMB1 already generated. Adding to dict ...
Embedding for PSMC6 already generated. Adding to dict ...
Embedding for RPS9 already generated. Adding to dict ...
Embedding for CHMP2A already generated. Adding to dict ...
Embedding for BOP1 already generated. Adding to dict ...
Embedding for ERAL1 already generated. Adding to dict ...
Embedding for ACIN1 already generated. Adding to dict ...
Embedding for INO80E already generated. Adding to dict ...
Embedding for CEP97 already generated. Adding to dict ...
Embedding for C7orf26 already generated. Adding to dict ...
Embedding for SYF2 already generated. Adding to dict ...
Embedding for SPCS2 already generated. Adding to dict ...
Embedding 

In [39]:
len(rpe1_all_perts_embex)

2105

In [40]:
for emb in rpe1_all_perts_embex.values():
    print(emb.shape)

(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)

#### K562

In [41]:
## Load data
with open(f'{raw_expr_path}/replogle_k562/all_perts.pkl', 'rb') as f:
    k562_all_perts = pkl.load(f)
    
print(len(k562_all_perts))

## Load files for K562 data, including mask and check shape

with open(f'{raw_expr_path}/replogle_k562_mask_df.pkl', 'rb') as f:
    k562_mask = pkl.load(f)
   

1866


In [42]:
## Find perturbations in control data
k562_perts = (set(k562_ctrl_adata.var_names).intersection(k562_all_perts))
len(k562_perts)

1866

In [43]:
## Find non expressed genes
k562_non_exp_gene = k562_mask.columns[k562_mask.sum() == 0]

In [46]:
k562_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/replogle_k562'

prep_pert_data(k562_perts, k562_mask, k562_ctrl_adata, k562_pert_path)

 12%|█▏        | 231/1866 [00:00<00:01, 1160.25it/s]

RPS27A adata already prepared.
UTP15 adata already prepared.
NEDD1 adata already prepared.
PFDN5 adata already prepared.
TRMT10C adata already prepared.
PSMB1 adata already prepared.
PSMC6 adata already prepared.
RPS9 adata already prepared.
CHMP2A adata already prepared.
ERAL1 adata already prepared.
BOP1 adata already prepared.
ACIN1 adata already prepared.
CEP97 adata already prepared.
C7orf26 adata already prepared.
SYF2 adata already prepared.
RNF123 adata already prepared.
SPCS2 adata already prepared.
H2AFX adata already prepared.
URB1 adata already prepared.
EIF3D adata already prepared.
SRCAP adata already prepared.
COPB1 adata already prepared.
TFAM adata already prepared.
GRSF1 adata already prepared.
MAX adata already prepared.
DSN1 adata already prepared.
UTP6 adata already prepared.
RNF4 adata already prepared.
HNRNPH1 adata already prepared.
TUBG1 adata already prepared.
VEZT adata already prepared.
MCM4 adata already prepared.
NSMCE2 adata already prepared.
BRD4 adata a

 25%|██▌       | 474/1866 [00:00<00:01, 1192.11it/s]

PPME1 adata already prepared.
MRPL43 adata already prepared.
SRRM1 adata already prepared.
PGPEP1 adata already prepared.
TSEN34 adata already prepared.
RPL34 adata already prepared.
RAB6A adata already prepared.
CEP68 adata already prepared.
INTS6 adata already prepared.
TACC3 adata already prepared.
SMNDC1 adata already prepared.
VCP adata already prepared.
PABPC1 adata already prepared.
COASY adata already prepared.
RNPS1 adata already prepared.
TAMM41 adata already prepared.
C17orf58 adata already prepared.
VPS33A adata already prepared.
CCNH adata already prepared.
RAD17 adata already prepared.
GTF2E2 adata already prepared.
AP2M1 adata already prepared.
TRMT5 adata already prepared.
DOHH adata already prepared.
PSMB3 adata already prepared.
CDC6 adata already prepared.
MRPS10 adata already prepared.
TBCB adata already prepared.
ZNF100 adata already prepared.
POT1 adata already prepared.
CDCA5 adata already prepared.
GTF2H2C adata already prepared.
BRK1 adata already prepared.
TXN

 39%|███▊      | 719/1866 [00:00<00:00, 1202.32it/s]

RBM8A adata already prepared.
SNRPD2 adata already prepared.
POLR2H adata already prepared.
SCFD1 adata already prepared.
TINF2 adata already prepared.
TMX2 adata already prepared.
DHX37 adata already prepared.
DYNC1H1 adata already prepared.
NDC80 adata already prepared.
MRPL32 adata already prepared.
SOD2 adata already prepared.
DDX5 adata already prepared.
ABCB7 adata already prepared.
DRG1 adata already prepared.
GTF3C4 adata already prepared.
POLR2A adata already prepared.
CDC40 adata already prepared.
C9orf16 adata already prepared.
MRPS2 adata already prepared.
TSPYL5 adata already prepared.
ZW10 adata already prepared.
VPS25 adata already prepared.
RPN1 adata already prepared.
ZNF131 adata already prepared.
OIP5 adata already prepared.
WBP1L adata already prepared.
TELO2 adata already prepared.
PSMA7 adata already prepared.
EIF2B5 adata already prepared.
PRMT5 adata already prepared.
PRPF40A adata already prepared.
CLTC adata already prepared.
RPA3 adata already prepared.
GTF3C

 52%|█████▏    | 970/1866 [00:00<00:00, 1231.70it/s]

PCBP1 adata already prepared.
TUBGCP2 adata already prepared.
TERF2 adata already prepared.
SRP14 adata already prepared.
EXOSC10 adata already prepared.
SUPT4H1 adata already prepared.
NDUFS3 adata already prepared.
MRPL54 adata already prepared.
RPS28 adata already prepared.
GATA1 adata already prepared.
RPL13 adata already prepared.
RPAP1 adata already prepared.
NOP2 adata already prepared.
MIS12 adata already prepared.
CRCP adata already prepared.
PSMC3 adata already prepared.
PPRC1 adata already prepared.
BDP1 adata already prepared.
NAA25 adata already prepared.
SLC1A5 adata already prepared.
U2SURP adata already prepared.
CTNNBL1 adata already prepared.
DDX19A adata already prepared.
POLR3C adata already prepared.
TRNT1 adata already prepared.
RNF20 adata already prepared.
GNL2 adata already prepared.
TIMM8A adata already prepared.
MARS2 adata already prepared.
TAF2 adata already prepared.
DHPS adata already prepared.
MPHOSPH10 adata already prepared.
POLR2F adata already prepar

 65%|██████▌   | 1220/1866 [00:01<00:00, 1232.94it/s]

MRPL18 adata already prepared.
TSEN54 adata already prepared.
MRPS18B adata already prepared.
MOCS3 adata already prepared.
COX5B adata already prepared.
RNGTT adata already prepared.
TTC1 adata already prepared.
UBQLN4 adata already prepared.
RPS23 adata already prepared.
RTEL1 adata already prepared.
AARS2 adata already prepared.
ISCA2 adata already prepared.
UBR4 adata already prepared.
SMC4 adata already prepared.
ZMAT2 adata already prepared.
NAA15 adata already prepared.
PDRG1 adata already prepared.
ACTR10 adata already prepared.
USP5 adata already prepared.
SEC61G adata already prepared.
NVL adata already prepared.
LAMTOR4 adata already prepared.
ERCC2 adata already prepared.
MRPS25 adata already prepared.
RBBP5 adata already prepared.
USP8 adata already prepared.
PIK3R4 adata already prepared.
PSME1 adata already prepared.
WDR36 adata already prepared.
UNC45A adata already prepared.
GAB2 adata already prepared.
EWSR1 adata already prepared.
RPL26L1 adata already prepared.
GMPS

 79%|███████▊  | 1468/1866 [00:01<00:00, 1226.99it/s]

NUP160 adata already prepared.
EGLN2 adata already prepared.
CEBPZ adata already prepared.
EXOSC7 adata already prepared.
PTMA adata already prepared.
DPM2 adata already prepared.
RPE adata already prepared.
LUC7L3 adata already prepared.
THOC5 adata already prepared.
HSP90B1 adata already prepared.
CWF19L2 adata already prepared.
NUF2 adata already prepared.
WBP1 adata already prepared.
CHEK1 adata already prepared.
ZCRB1 adata already prepared.
MED10 adata already prepared.
RPA2 adata already prepared.
CCT8 adata already prepared.
PARS2 adata already prepared.
TIMM13 adata already prepared.
HOXC10 adata already prepared.
ACTR6 adata already prepared.
UBTF adata already prepared.
PRKRIP1 adata already prepared.
DCTN3 adata already prepared.
XRN2 adata already prepared.
NPM3 adata already prepared.
SMARCB1 adata already prepared.
SDHAF2 adata already prepared.
RPL7L1 adata already prepared.
PSMD9 adata already prepared.
CENPH adata already prepared.
PSMD7 adata already prepared.
SNUPN 

 92%|█████████▏| 1715/1866 [00:01<00:00, 1225.87it/s]

VHL adata already prepared.
TMSB10 adata already prepared.
NSA2 adata already prepared.
RPS14 adata already prepared.
RPL30 adata already prepared.
TEFM adata already prepared.
SLC25A3 adata already prepared.
PWP1 adata already prepared.
EIF3G adata already prepared.
ARID3A adata already prepared.
MED27 adata already prepared.
DNAJC8 adata already prepared.
C19orf25 adata already prepared.
FAU adata already prepared.
DHFR adata already prepared.
SNRPD3 adata already prepared.
RACGAP1 adata already prepared.
CIAO1 adata already prepared.
HTATSF1 adata already prepared.
PFDN2 adata already prepared.
SMARCE1 adata already prepared.
TRIAP1 adata already prepared.
PRPF6 adata already prepared.
MRPS35 adata already prepared.
UBE2N adata already prepared.
DNMT1 adata already prepared.
COG3 adata already prepared.
CDC23 adata already prepared.
SF3A2 adata already prepared.
NXF1 adata already prepared.
URM1 adata already prepared.
PTBP1 adata already prepared.
CDC26 adata already prepared.
EIF3

100%|██████████| 1866/1866 [00:01<00:00, 1216.52it/s]

CCNB1 adata already prepared.
RPS3 adata already prepared.
RPS4X adata already prepared.
SAMM50 adata already prepared.
ZNF317 adata already prepared.
SLC39A9 adata already prepared.
PGD adata already prepared.
ACTB adata already prepared.
TCOF1 adata already prepared.
BRIP1 adata already prepared.
SFSWAP adata already prepared.
PNO1 adata already prepared.
DNAJC9 adata already prepared.
ABHD11 adata already prepared.
FAM32A adata already prepared.
CCT3 adata already prepared.
NAA10 adata already prepared.
TAF8 adata already prepared.
POLR3H adata already prepared.
CPSF6 adata already prepared.
OGT adata already prepared.
FAM207A adata already prepared.
NDUFB6 adata already prepared.
MYCBP adata already prepared.
SART3 adata already prepared.
RBX1 adata already prepared.
EP400 adata already prepared.
POLR2E adata already prepared.
MCMBP adata already prepared.
SRSF6 adata already prepared.
INTS7 adata already prepared.
CHD4 adata already prepared.
DENR adata already prepared.
GIT2 adat




In [None]:
# load token dictionary (Ensembl IDs:token)
with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/token_dictionary.pkl', "rb") as f:
    gene_token_dictionary = pickle.load(f)

with open('/data/home/bty416/scFMs/bin/Geneformer/geneformer/gene_median_dictionary.pkl', "rb") as f:
    gene_median_dict = pickle.load(f)
    
# gene keys for full vocabulary
gene_keys = list(gene_token_dictionary.keys())

# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
genelist_dict = dict(zip(gene_keys, [True] * len(gene_keys)))

In [32]:
k562_pert_path = '/data/scratch/bty416/scFMs/data/raw_expression_counts/tokenizer_input/perts/replogle_k562'
k562_tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data/k562_pert'

k562_non_exp_gene = k562_mask.columns[k562_mask.sum() == 0]

tokenize_replogle_pert_data(k562_perts, k562_non_exp_gene, k562_pert_path, k562_tokenized_dir,
                           genelist_dict, gene_median_dict, gene_token_dictionary)

  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KPNA6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPARGC1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP133.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL33.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL32.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SKA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPC24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANLN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SAE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ERCC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1CA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DOLK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 27.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ORC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOC2L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSTK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NFATC2IP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/H3F3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WEE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.13 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YY1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MBTPS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFB4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EP400.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FDPS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SDHC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NRDE2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCT5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MASTL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANAPC10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LST1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 24.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATRIP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP2A2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.35 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HMGN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSMCE2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.26 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LAMTOR3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.71 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBGCP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC35B1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PUF60.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.77 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HNRNPC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OGFOD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PGAM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCTN4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LAMTOR2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRKRIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HDAC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OXA1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HDAC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SART1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.86 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR3B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ENO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR3F.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LAMTOR4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZBTB17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC7A5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SCD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ISY1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R12A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EMC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC7A6OS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 27.93 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:16, 24.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRSF10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.34 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:08, 23.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IGBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TBCC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 24.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COG8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL26L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM42.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNRD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DRAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF131.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:23, 20.29 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSPYL5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CRLS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDK11A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBGCP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:17, 20.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 26.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 27.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBBP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SHQ1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TFRC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2L3.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDCD6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOP2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 25.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARFRP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUFIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GEMIN8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SFSWAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCOA4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2B3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GRPEL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UQCRB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPLP0.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.24 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 26.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PARN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMU1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRELID3B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DPH2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CEBPZ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:08, 24.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 27.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDRG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MSRB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF1C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TPT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.94 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:20, 19.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:15, 18.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:13<00:13, 18.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:16<00:09, 19.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:08, 19.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:06, 19.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ORC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KRT10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PMF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DONSON.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CUL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.44 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNF8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.91 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNHIT6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DYNC1I2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR75.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 23.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 25.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS4X.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HUWE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC63.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHCHD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CNOT2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NVL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDCA5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRCA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SSU72.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF574.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2B4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.36 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DSN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:08, 23.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HIST1H2BC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAM133B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED31.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SETD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AP2S1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTK2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCTN3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3I.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR3D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.91 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PKM.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ADRM1.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C9orf78.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.15 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YPEL5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GEMIN4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IMP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CRCP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NBEAL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.72 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANKS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.27 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NISCH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIAA1143.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.00 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 22.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:12, 20.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GNL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMG4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PALB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDCD11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPCS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BCAS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LONP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.20 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.92 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THOC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCLRE1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2E.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARCN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.87 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:17<00:03, 24.97 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 23.80 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:20<00:01, 22.89 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 22.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL49.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZWINT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRCA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBR5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNW1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FCF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSME1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANKRD49.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OGT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPSA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCT8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CEP97.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL13A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THOC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBA52.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PCBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TCP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DENR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL31.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP93.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PNPT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 24.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MMS19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LAS1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL7A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUPV3L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP68.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2B5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2M.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IPO9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KRI1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF20A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LETM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.75 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 25.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:15, 22.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TADA2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MIS18A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPAN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MMGT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C7orf50.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUGP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TTI1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATF4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPWD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:22, 20.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 25.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF1AX.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:13, 22.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AHCTF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNTTIP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCRS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HNRNPH1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NEDD8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DTL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CNN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3E.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TMEM258.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.40 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRP7A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP2CA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPC25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POP7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRN3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABCF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 24.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAT10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HIST1H2BN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 27.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHFR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRP15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPIL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 22.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.20 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF207.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KTI12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP200.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRGBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PLK4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC16A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.15 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CNIH4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAD9A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS34.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMARCB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MPHOSPH10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:13, 22.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CTNNBL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.39 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:17, 23.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:15, 21.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL34.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCBP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CPSF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNAPC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF317.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUPT6H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.13 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TNPO3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ROMO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.33 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 27.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCTN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RIOK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:21, 18.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:19, 19.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 19.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 19.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:10, 19.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:16<00:09, 19.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 19.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:06, 19.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.01 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:18<00:00, 26.91 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FOXL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSFM.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL37.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.69 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP160.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPW.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SYMPK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MLST8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP37.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MIOS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSUN4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL7L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MKRN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GATA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.00 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PWP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP43.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJA3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KANSL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CRKL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSCB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DAP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 29.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GRB2.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.92 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPLP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UPF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SDAD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 20.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GGPS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RABGGTA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJC17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.45 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:18<00:04, 21.90 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:19<00:02, 24.02 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:20<00:01, 25.60 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 23.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOPBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNHIT3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GET1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FBXO5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GIT2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CLP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRIX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THG1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 22.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:15, 22.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUPT5H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.29 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NPLOC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CLTC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIPIN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSEN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TTC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GPS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1G1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX3X.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPRTN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RABGGTB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CFL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CTU2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX51.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAU.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL41.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUMO2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NEMF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6AP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MEPCE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:12, 20.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUMA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HMGA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRSF11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:23, 19.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 25.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/U2SURP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHODH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.25 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANAPC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RCC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.25 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.02 examples/s][A
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)


Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:1

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL50.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PHB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OPA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBBP8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.13 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX36.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.05 examples/s][A
Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.18 examples/s][

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFB8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PNKP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/QARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL18A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS15A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.92 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PIK3R4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUTF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NLE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBL5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/JMJD6.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 28.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 25.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TELO2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CLSPN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NIFK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.31 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.97 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TKT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHCHD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2S2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS37A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CIAO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:06, 20.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USPL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ESF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRRAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TINF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP4R2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TTK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF4A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MICOS10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SSBP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRKRA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:10, 21.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 21.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 26.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IMPDH2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NPAT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.10 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:13<00:03, 29.81 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:14<00:02, 28.44 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:15<00:01, 28.78 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:17<00:00, 29.27 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 28499.33 examples/s][A
  for i in adata.var["ensembl_id"][codi

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SCNM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANKLE2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX49.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MZF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHERP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP153.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:06, 20.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM44.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL36AL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SARNP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2D3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX33.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABCE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RTTN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.37 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:18, 18.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS22.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BMS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DLD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.29 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 23.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PFDN5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAM50A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFA4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNAPC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRP9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DMAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V0C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARIH1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/E4F1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS29.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ADSL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRCC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUDCD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MOCS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX52.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.90 examples/s][A
Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF3C3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GAK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:15, 20.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 20.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SKA3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NFRKB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.29 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFAB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDOST.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM8A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DYNLRB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MT2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C7orf26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.34 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCNL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WBP11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPJ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YRDC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRMT112.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL39.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XPO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 22.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 21.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LTV1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2H2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SS18L2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.44 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SIN3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS31.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCAF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 24.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SDE2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF168.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRRM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AAAS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CEP85.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBA5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PHF12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MEX3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HMGCR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TPX2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/URI1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AASDHPPT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPDL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CSNK1A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL29.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.27 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBBP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANAPC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:23, 20.34 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 25.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSEN54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAB6A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZC3H8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PES1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:20, 19.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THOC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNUPN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/QRSL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 26.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 24.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SKP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP6C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTPBP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.97 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 21.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 21.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCT3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2F2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EEF1G.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SCAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DYNC1H1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC39A10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS33A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TEFM.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GNL3L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 24.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUBP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PELO.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCNK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 24.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A

Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.11 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:17<00:03, 25.73 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 24.21 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:20<00:01, 23.36 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 23.

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZMAT2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LUC7L3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 21.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UFM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPIA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARF4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 25.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM39.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR46.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.93 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TPR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.42 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 20.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PCBP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF720.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR82.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARPC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STIL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TMA16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAD17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:22, 20.36 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 25.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUPT4H1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PAF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMB5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.18 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DIS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC39A7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NR2C2AP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C9orf16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 18.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UQCRFS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 24.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNMT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FGFR1OP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFA11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.13 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF31.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AURKB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR1E.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 27.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KAT5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2E1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3G.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MZT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS18B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1CB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC42.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YEATS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/E2F6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 26.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP58.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ERH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CPOX.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GINS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRA2B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 25.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 22.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SOD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KPNB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDIPT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNPC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 27.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 28.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAM207A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CXXC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GAR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 26.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 24.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPTLC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF3C2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHAF1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.00 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MTOR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PXN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBMXL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:19, 18.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:13, 20.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 22.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:08, 24.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 27.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 28.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SENP6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/H2AFZ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TM7SF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:07, 27.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EWSR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C1orf109.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDPK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTCD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MYCBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZCCHC9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDCA8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRFBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL10A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RFC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2H2C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 22.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 27.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 27.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NFYC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KLC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GFER.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/REXO2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCDC84.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR1C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 21.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPATA5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCAR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFB6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.71 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHEK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CKAP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DPAGT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.25 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CUL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCNB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRSF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPUSD4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOMM40.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.77 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:17, 20.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX7B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC61A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAMM41.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNHIT2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ISCU.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/URB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOC4L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VMP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EPRS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.40 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.89 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 24.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CNOT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACIN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDK7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL43.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MOB4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBA3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBTF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRD4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ORC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFAF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRCC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRNT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAD51C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCMBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 22.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NHLRC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CTPS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TEX10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GEMIN7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.40 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:08, 26.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRIM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RGP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS13D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SBDS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.92 examples/s][A

Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.45 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:15<00:03, 28.27 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:16<00:02, 25.65 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:18<00:01, 23.95 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:19<00:00, 25.09 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COQ4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.17 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCKAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AATF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS37C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.86 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAD51D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.72 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 27.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KAT8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TTF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRMT5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR10.dataset
Creating dataset.



Map (num_proc=16):  94%|█████████▍| 469/500 [00:18<00:01, 24.66 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:19<00:00, 25.08 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 24572.93 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF40A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBR4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3B1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:28, 16.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFA8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 20.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 20.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 20.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/URB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.32 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:11, 28.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNAPC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.97 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OSBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GPN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DIMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ORC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:17, 20.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:13, 18.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 19.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 20.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 22.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TICRR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GET3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTMA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 22.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 24.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS29.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CNOT3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DERL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 24.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SON.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 19.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GMPPB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.94 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRSF7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 27.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NFS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OIP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC25A3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:19, 21.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRPM7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NACA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SOD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V0D1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:06, 20.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJC11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPIL4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ECD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.37 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF4G2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP2R1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MVK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CEP192.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TBCA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PFDN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCNA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ILF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRELID1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHTOP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.94 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ERCC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAPPC8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SYF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CASP8AP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL35A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.36 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 26.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 27.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC73.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CTR9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALDOA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UPF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BARD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MYBL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 25.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL38.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BUB3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM22.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP214.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NEPRO.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KDM2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FARSA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KCMF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:13, 21.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARPC4.dataset
Creating dataset.



Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.23 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:17<00:04, 23.21 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 24.83 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:20<00:01, 25.02 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 23.46 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.33 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FARSB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GABPA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.44 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HNRNPR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL24.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MYC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.23 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.48 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MMS22L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUDT21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF100.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFB3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 28.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CAMLG.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2H3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GRWD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.95 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:16, 18.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 19.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 19.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 19.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 19.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:06, 19.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EMC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:20, 19.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:13, 19.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:10, 20.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 23.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 22.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPP40.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TXNL4A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LIMS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VEZT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RSL24D1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRRT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCDC137.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CYC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.40 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:20, 21.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 21.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZRANB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THOC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJC9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALG11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SETDB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELOVL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.75 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP107.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.80 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NASP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INCENP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL36.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR70.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][AIOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

[A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.53 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:18<00:03, 23.53 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:19<00:02, 25.19 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:20

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC1A5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TWF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UQCRH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 21.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CPSF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RTCB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMUG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.60 ex

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FEN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CBX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRCC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PNN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZC3H18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2H4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL32.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RFC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CSNK2B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MFN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UFL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDHD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 25.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SAP30BP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF123.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOMM20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METAP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMG5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 24.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 20.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELP6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABCB7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NELFCD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARID3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDAP1.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP62.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CYFIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UXS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.91 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PHF5A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPME1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TTC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAPPC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PCNA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COG3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX6C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOP56.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DPH3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSR2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1E1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.80 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UNC45A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.39 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM23B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COQ5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STX5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IPO11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TFB2M.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NPM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BANF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBGCP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/REV3L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TPRKB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COASY.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TADA3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP40.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.89 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 28.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 28.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TIMM10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNM1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL14.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PAM16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PFDN4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.77 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TMEM127.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.82 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM48.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCUN1D5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.38 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GBF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C19orf25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WBP1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFS5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL36.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.48 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 28.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM8A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.63 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.69 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2B2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC39A9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCP110.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YBX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.58 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:15<00:03, 27.60 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:16<00:02, 28.40 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:17<00:01, 28.99 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:18<00:00, 26.39 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 47260.83 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PELP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RFT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF3C1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSPA9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TCOF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HYOU1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR74.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:15, 24.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR43.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R15B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:28, 16.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBMX.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AQR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CLASRP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA38.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX37.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMB4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.34 ex

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CFDP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MBTPS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 26.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.69 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 28.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTL6A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ERAL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STRAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL37.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 26.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:07, 27.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CINP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 20.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTPN23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1F.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL27A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.48 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PHB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PIAS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GON4L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.08 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 19.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 19.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 19.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:09, 19.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 19.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:06, 19.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:22, 18.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:19, 19.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.94 e

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX5A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NXF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PGS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GNL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MTG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 22.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSMCE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.38 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 28.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FDXR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.90 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMARCE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDTC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HBS1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARMC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.52 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MCM3AP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL51.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YKT6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL30.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.52 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.93 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR61.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GLI4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRBD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBGCP6.dataset
Creating dataset.



Map (num_proc=16):  94%|█████████▍| 469/500 [00:19<00:01, 26.52 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:20<00:00, 24.53 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 54184.37 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSD17B12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.07 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALG14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARGLU1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 27.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FIP1L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PREB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPP30.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX41.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:09, 23.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 18.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL39.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TBL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF4B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAPPC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GSPT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GLRX5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.84 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:11, 28.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 29.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNAPC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SSBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FXN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HNRNPM.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GINS4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UQCRC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EEF1B2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.18 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.72 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 27.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF4G1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.42 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAPA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOLC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CTCF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PKMYT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TERF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/H2AFX.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CMTR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.40 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 20.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SBNO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.46 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHPS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.97 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 28.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP1R7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GLMN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSR1.dataset
Creating dataset.



Map (num_proc=16):  81%|████████▏ | 407/500 [00:16<00:03, 24.80 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 23.60 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:19<00:01, 22.80 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 23.52 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 53778.64 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR3H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDK2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TEN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 28.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.14 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:14<00:03, 24.42

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EMC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SYS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC5L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HOXC10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR55.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.86 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PMPCA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARL4D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 20.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MIS18BP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAIP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPTSSA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR26.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.45 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RACGAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ARPC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL37A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.19 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TP53RK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP2R3C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BET1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR92.dataset
Creating dataset.



Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.20 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:17<00:03, 24.07 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 25.56 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:19<00:01, 26.68 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:20<00:00, 24.29 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 54262.89 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX56.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SERBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSPE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:06, 20.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WTAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP4B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BYSL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.43 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INO80B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PGPEP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/METTL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOR2A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DICER1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3F.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CIAPIN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PEX1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UPF3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPZ1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.04 e

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TLCD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.80 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 27.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 24.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:09, 23.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF38A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.82 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL45.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CSTF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NELFE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF3C6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLC25A10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RCL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MNAT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.12 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/YEATS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SET.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.39 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IPO13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR83OS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GNB1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.04 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PFDN6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SURF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RANBP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP70.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.75 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:15, 25.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 29.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 22.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 25.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TMX2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBGCP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPAP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 28.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2S1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBM17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.54 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RUVBL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 25.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 22.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CPSF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TARDBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.94 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.59 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:09, 23.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:07, 22.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOC3L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 27.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PHAX.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR77.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PABPC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DPM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ORC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 26.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC62.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF40.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UNC50.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BUB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 24.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CWC15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRPRB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CUL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:02<00:29, 15.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:15, 23.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 25.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.65 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BCAR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TERF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUPT16H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP88.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BPTF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FNTA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.72 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAB18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PMPCB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.69 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KRR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:16, 25.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:15, 22.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 27.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.98 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 20.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 18.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 19.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:10, 19.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 22.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF468.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.09 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RTEL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEPSECS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NSA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DKC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 22.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF1D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2H1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCDC86.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.10 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:16, 20.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:13, 22.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 26.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:07, 27.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 28.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 23.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/U2AF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TSG101.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR33.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.73 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ILF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:06, 20.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BCLAF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2F.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.71 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AHCY.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EMC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HTATSF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX16.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.51 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP48.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IMP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.96 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VCP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.57 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.58 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MSTO1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:11, 26.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AIFM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF4E.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPAP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.67 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCT4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.48 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDCD6IP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 25.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLRMT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 23.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 22.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MAD2L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.16 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMB7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED28.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.21 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HIST1H2AE.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.09 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.48 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX19B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPH2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC61G.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CD2BP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.69 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3J.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C1D.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.77 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TXNL4B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.76 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.43 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GAPDH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.15 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IPO7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.59 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPRC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPLP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJC8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2K.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTCD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.31 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BOP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMA4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCDC130.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3B5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS18A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.98 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GUK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.01 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.71 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RFC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:11, 28.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 24.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF18A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 27.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COQ2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.18 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.61 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COG6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C19orf53.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TWISTNB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHX8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.20 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WNK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.42 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GMPS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SLU7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 27.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF830.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PGD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.14 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 22.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GNPNAT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 22.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3B4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.77 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCNH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.81 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CWC25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 25.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NRBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.76 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABHD11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.59 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPP21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNF113A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C12orf45.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC23.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 18.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.62 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAGLU.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.88 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 25.64 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.16 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSPA5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.64 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HEATR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:17, 19.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:18<00:05, 21.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX7C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBQLN4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.52 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.46 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP39.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.59 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TFDP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NBAS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.13 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COX15.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.83 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.31 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS41.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.71 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.54 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2A2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.25 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS27.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.37 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.57 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.21 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAB4A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.87 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOMM22.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 20.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANAPC13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.96 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.33 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DBR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.22 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DHDDS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.93 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ADAT2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.33 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATF5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.20 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.76 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.95 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RCOR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.77 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHTF18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.78 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PFN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SPATA5L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 27.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR36.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.29 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VPS72.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GINS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 21.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.65 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAPPC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPD3.dataset


  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.92 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHORDC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.85 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DRG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.92 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EEF1A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SFPQ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPP14.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.75 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.68 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3A3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.28 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX19A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.27 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.60 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.31 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRSF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 24.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 23.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3B3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GEMIN5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.24 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NPM3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.96 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.39 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VHL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.50 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LRR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNAPC3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.90 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:18<00:02, 25.83 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:19<00:01, 23.94 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:21<00:00, 23.48 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 52685.64 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miRNA_loc]


Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMG3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 27.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 28.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SCFD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GAB2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.46 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.41 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABHD17A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TGS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 25.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:15, 22.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.53 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.03 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 22.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RINT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.36 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/OSTC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.37 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.97 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.67 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CACTIN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.33 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.20 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 26.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.01 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 20.99 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 20.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX59.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNGTT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.79 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.09 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.88 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TCERG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.71 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF2B1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.16 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.35 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF5A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 25.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MANF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:05, 26.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EBNA1BP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.15 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.32 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDK9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.45 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.01 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.94 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.44 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EGLN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.56 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 21.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/C17orf58.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.53 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.99 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ABT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.02 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAPG.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 24.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:10, 21.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSAT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.41 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.97 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.79 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC37.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.20 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MIPEP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.19 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:12, 20.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL36A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.18 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL34.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.35 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.94 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RBBP6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED22.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.31 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.29 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FASTKD5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SART3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.64 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.14 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BIRC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.79 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZNF335.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.69 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 23.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INTS8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.21 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.97 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 27.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STARD7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MDN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.61 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRP72.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 30.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 30.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 26.60 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3H.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.74 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.76 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.40 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.00 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPS8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.47 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHMP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRCC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.36 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RTF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DUT.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.22 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.88 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.47 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.91 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EDC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.48 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 24.07 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.24 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.82 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HLA-C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.13 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CWF19L2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.05 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BTF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.57 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAM136A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RHOQ.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.29 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.53 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HUS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.37 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.98 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.38 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP1A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.01 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.63 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.63 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/URM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.61 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.40 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BORA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PAXBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.16 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EFTUD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.16 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.90 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:06, 23.16 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:04, 25.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDUFA6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.63 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMAGP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.36 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.21 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.55 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.93 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NOL12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.14 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.77 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPG.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.56 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.94 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.13 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NELFB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.81 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.44 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GRSF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:20, 23.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:16, 26.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 28.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.98 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 24.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MVD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.26 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.48 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.50 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SASS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 25.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.51 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2F1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.38 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RFC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.49 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.01 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLE2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.53 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.00 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.91 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.82 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPP38.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.35 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPAIN.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.24 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 25.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FNBP4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.58 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TBP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.41 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.13 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.68 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/N6AMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.32 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.00 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.31 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.35 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.20 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATL2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.69 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GFM1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.63 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.65 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NFYB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.71 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.19 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3A2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.99 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.49 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.64 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SACM1L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.90 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.20 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KRT8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.45 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:16<00:03, 23.49 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:17<00:02, 25.25 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:18<00:01, 26.59 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:19<00:00, 25.93 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 53908.59 examples/s][A
  for i in adata.var["ensembl_id"][codin

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CSTF3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.53 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.27 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.28 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.00 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.26 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/COPS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.76 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.80 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.65 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.83 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KIF18B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.65 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.35 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 25.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF1AD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:17, 26.41 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 29.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.50 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.42 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.62 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.47 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.70 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.22 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CCT7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EEF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.19 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 26.88 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:05, 23.55 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF4B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.97 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 22.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PIK3C3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.22 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.07 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RIOK2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.68 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.39 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.71 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 22.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.43 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.35 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.56 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STRIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.70 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.55 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 28.70 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.23 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.15 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.45 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.67 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SUGT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.87 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 23.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/INO80.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.59 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.71 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LYRM4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPTOR.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.85 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.75 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GPKOW.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.87 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.17 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ISG20L2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.60 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.52 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.89 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:16<00:03, 25.50 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:17<00:02, 26.77 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:18<00:01, 26.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.73 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.41 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.20 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HMGB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.39 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.88 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.54 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.83 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.51 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.55 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.96 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOSC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDCD7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.75 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.52 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.84 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PRPF6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.57 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.17 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DPH6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.85 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.58 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.52 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.76 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KCTD10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.93 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.21 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.13 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.02 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.00 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX42.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.27 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.39 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.90 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NUP98.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.00 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NMD3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.20 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.78 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.88 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 27.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.27 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TOP3A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.06 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.57 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.94 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.20 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.07 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NAA50.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.59 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.56 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.08 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.93 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.81 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TMSB10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.24 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.81 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.18 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 28.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.50 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.35 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.91 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 26.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 24.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.22 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.43 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRAPPC11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.66 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.50 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRMT10C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:12, 26.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 24.50 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 22.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.54 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BNIP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.47 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CEP68.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.10 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.79 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.02 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SSRP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.63 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.30 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.41 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.66 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LRPPRC.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.78 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.72 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.04 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.73 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.51 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.42 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.59 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CD3EAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.92 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.71 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.73 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.66 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AP2M1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.20 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.41 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.18 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.69 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.18 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.69 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.46 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.36 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SEC61B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.33 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.04 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GTF2A1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.17 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.92 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 25.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HMGCS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.92 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.27 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.73 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.28 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.37 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IARS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.21 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.51 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.87 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.33 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RANGAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.54 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.65 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.71 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 29.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 29.85 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 29.80 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.03 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 23.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.08 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:11, 23.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.07 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.87 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PGK1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.47 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.93 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 24.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.43 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELP3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.32 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.12 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.51 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.78 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.04 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.66 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ALYREF.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.69 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.13 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BUD13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.19 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.73 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.15 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 22.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.31 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 24.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.48 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/IFITM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.39 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.43 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.28 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.53 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.76 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THOC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 20.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.02 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.63 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.75 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.19 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DYNLL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.60 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.63 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:09, 23.40 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.38 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMB3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.17 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.62 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.28 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRNP25.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.34 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PIGH.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.04 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.71 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.97 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2I.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.60 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.96 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.89 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.11 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.34 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL23A.dataset
Creating dataset.



Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 22.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.27 examples/s][A
Map (num_proc=16):  81%|████████▏ | 407/500 [00:15<00:03, 27.37 examples/s][A
Map (num_proc=16):  88%|████████▊ | 438/500 [00:16<00:02, 28.06 examples/s][A
Map (num_proc=16):  94%|█████████▍| 469/500 [00:17<00:01, 28.88 examples/s][A
Map (num_proc=16): 100%|██████████| 500/500 [00:18<00:00, 26.37 examples/s][A

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s][A
Saving the dataset (1/1 shards): 100%|██████████| 500/500 [00:00<00:00, 40602.35 examples/s][A
  for i in adata.var["ensembl_id"][coding_miRNA_loc]
  coding_miRNA_ids = adata.var["ensembl_id"][coding_miR

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPS9.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.07 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SAMM50.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.19 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRCAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.36 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.68 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 22.96 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.25 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.75 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:14, 26.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 24.26 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NDC80.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.05 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.91 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:13, 25.90 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:13, 23.77 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:12, 23.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 24.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 27.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.97 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.16 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.15 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.29 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.87 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBB.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.09 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.74 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.45 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.85 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 24.89 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZBTB11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.59 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 19.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.31 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.49 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PCID2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.61 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.06 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.87 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.79 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.95 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EXOC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.29 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.03 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.28 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SMG1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.70 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.52 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.47 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.56 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.56 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.90 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BCL2L1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.70 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.22 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.81 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.60 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.73 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/KANSL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.05 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.49 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.60 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.06 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELAC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 22.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.78 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CENPA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.24 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.55 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 24.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 26.34 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PPP4C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.71 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.10 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.97 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.14 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.03 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.00 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GLRX3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.67 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.92 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.08 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ING3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:18, 23.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.51 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:17, 20.81 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.89 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.13 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.12 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.30 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:11, 25.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.55 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.68 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.40 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RAD51.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 30.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.38 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.39 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 23.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XRCC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.50 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.95 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.67 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.37 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.89 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UTP20.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.79 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 22.06 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.63 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:09, 25.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.17 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.83 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/XPO5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.95 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.83 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.74 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.74 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.44 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RNASEH2C.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.32 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.00 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 23.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.62 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SDHAF2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.05 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.09 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 20.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.62 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.61 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.67 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ISCA2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.13 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.29 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.54 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.64 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.65 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/THAP11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.32 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.71 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.49 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.25 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 24.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 25.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.97 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MED17.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.07 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.99 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.44 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 22.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR2L.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.38 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 26.69 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:06, 23.57 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HGS.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.75 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.37 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.26 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 30.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.25 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 28.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TAF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.52 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.90 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 22.92 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.40 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.21 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.90 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.27 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HAUS8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.51 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.83 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.78 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.66 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.55 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 28.24 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 28.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BDP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.14 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.38 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.87 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 24.02 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 23.33 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 24.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SAP130.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.74 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.42 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.58 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.80 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.52 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.10 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.22 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.89 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.90 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCTN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.67 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.21 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.64 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.62 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.51 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.60 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/VBP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.83 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.89 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.41 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.56 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.35 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 23.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.86 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.01 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ANKRD11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.52 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.86 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.20 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.85 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.14 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 25.34 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NHP2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.11 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:13, 24.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 26.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.15 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UBE2Z.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.66 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.59 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.12 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.73 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 26.63 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FTSJ3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.66 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.76 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.27 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.21 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.81 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.71 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RRP12.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.64 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 20.88 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.72 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 20.77 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.75 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.29 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CHCHD4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.61 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.06 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 24.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.12 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GUCD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.94 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.78 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.01 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 26.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.22 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.05 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL35.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.55 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.87 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 28.10 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.51 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 26.42 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 24.47 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.35 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HIRA.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.67 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.77 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 28.03 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:12, 25.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.30 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:11, 21.65 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.01 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 21.07 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.42 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.52 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LSM5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.74 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.62 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 28.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.40 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.59 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POLR3E.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.18 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.68 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.12 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.34 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MFAP1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.15 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.80 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.50 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.97 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.61 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.48 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.26 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DCTN6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.93 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.49 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.25 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.50 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.91 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 23.05 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CBLL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.65 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.28 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.66 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.43 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.06 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.74 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 24.49 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GEMIN6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.35 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.12 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.23 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.30 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.72 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.36 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.44 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.26 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.76 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.89 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.83 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.76 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.54 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 26.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZCRB1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.27 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.37 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.34 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.65 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.81 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.58 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 26.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 27.15 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UQCC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:20, 21.23 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 27.04 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.19 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.17 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.43 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.39 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.09 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.79 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PET117.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.69 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.12 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.65 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.99 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.56 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.29 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.82 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 20.83 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 23.30 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AKIRIN2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.94 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.25 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.60 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.64 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.11 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.02 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CRNKL1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.89 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.67 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.74 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.81 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.86 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.91 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.92 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 25.45 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.52 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/POT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.58 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.89 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.08 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.00 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 23.49 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.68 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.26 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LENG8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.82 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.77 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.36 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.61 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.41 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.46 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PEF1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.34 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.78 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.58 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.19 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:10, 21.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 20.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.92 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 24.93 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PABPN1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:23, 19.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 25.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.77 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 28.95 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.17 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:09, 26.09 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.15 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:07, 22.05 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 22.47 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.39 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.10 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:18, 20.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.47 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.64 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.75 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.59 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.61 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 29.06 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.94 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.72 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.73 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GINS1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.80 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.33 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.98 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.00 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.86 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.95 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.81 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.08 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 23.98 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 25.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ACTR8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.68 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.82 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.03 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.05 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:10, 26.69 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:10, 24.61 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:09, 23.28 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:08, 22.49 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 22.21 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 23.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RSL1D1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.46 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.32 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.53 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 23.16 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.00 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.77 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.79 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.58 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPL13.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.62 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.45 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:13, 26.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.41 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.05 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 22.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 21.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 23.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.64 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CLK2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.17 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.72 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.91 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.26 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 26.45 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 24.44 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.32 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:05, 23.04 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AFG3L2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:26, 17.90 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:19, 22.92 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:15, 26.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.02 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.69 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.53 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.12 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.71 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FBL.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 17.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:23, 18.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:05<00:21, 19.07 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:19, 19.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:08<00:17, 19.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:15, 19.44 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:11<00:14, 19.32 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:13<00:12, 19.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:14<00:11, 19.60 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:16<00:09, 19.86 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:17<00:07, 20.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:19<00:05, 20.70 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BUB1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.52 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.86 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.85 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.40 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 27.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 24.99 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 23.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RSRC2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.57 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:13, 28.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.07 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.31 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.59 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.70 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.76 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.04 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.02 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.25 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SNRPA1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.06 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.33 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.04 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.13 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.97 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.43 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/GLE1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.50 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.31 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.34 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.28 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.39 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.23 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.84 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.10 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.55 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.72 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/UROD.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.84 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.84 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.51 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.86 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 22.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 26.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 27.80 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 25.62 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.12 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL3.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.21 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.74 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.26 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.15 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.12 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.25 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 23.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:07, 24.14 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 25.78 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:04, 26.98 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AAMP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.04 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.36 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.79 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.17 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.32 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 22.70 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 24.56 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.73 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.40 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.99 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.29 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.15 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.68 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.17 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.87 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.85 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DDX46.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.63 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.08 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.55 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.22 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.06 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.10 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 24.94 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.50 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.48 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:07, 21.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.16 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPG2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.02 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.31 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.95 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.24 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.70 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.87 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.37 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.88 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HCFC1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.96 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.02 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.61 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.14 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.11 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:09, 26.66 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:08, 24.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:07, 23.28 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.27 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 25.03 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SF3B2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.99 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:20, 20.16 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.43 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 27.94 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.64 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.20 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.54 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.82 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.95 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC45.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.11 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.85 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.98 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.84 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.26 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.01 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.13 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/AARS2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.86 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.38 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.57 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.97 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 20.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 20.83 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 20.72 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.85 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.43 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.96 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/BRD8.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:27, 16.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.40 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.72 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.32 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 22.08 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.46 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.31 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.11 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PDCD2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.92 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.37 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.95 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.09 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.84 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.33 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.52 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.72 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.14 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ZRSR2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.42 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 24.96 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.75 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 23.67 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 25.37 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.83 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.57 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.18 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.42 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL4.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.45 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.18 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 23.90 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.90 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 21.91 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.52 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.42 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.58 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.27 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 26.56 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 25.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/LIN54.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.03 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.15 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.00 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.14 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.22 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.86 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:09, 21.93 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.74 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.45 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DTYMK.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.54 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.46 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.14 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.57 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.78 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 30.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.17 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PCF11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.01 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 23.23 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.29 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.93 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 24.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 26.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 27.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.22 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.21 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMC5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.11 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:17, 24.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:14, 27.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 28.86 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 29.32 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 29.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.05 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.30 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.30 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.39 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:12<00:04, 30.46 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CPNE7.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.89 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.54 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.30 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.62 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.62 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:14, 21.99 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.66 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.34 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.23 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.13 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.91 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 23.84 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TUBA1B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.49 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 22.08 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.10 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.92 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.51 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.38 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.04 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.53 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.88 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 30.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/USP36.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 29.80 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.19 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 30.07 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.92 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 30.07 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 30.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.18 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:11<00:05, 30.23 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 26.77 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CDC6.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.47 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 19.99 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.48 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 20.79 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.70 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.46 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:13, 21.35 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:12<00:11, 21.08 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 20.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:15<00:08, 21.32 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.19 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/TRMT5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.45 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.70 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.44 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.25 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.70 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.74 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 25.60 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.95 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.91 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:06, 22.38 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.96 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSP90B1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.16 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.53 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 21.11 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.24 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.84 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.98 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.68 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 27.10 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 28.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 24.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NKAP.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.81 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.16 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.56 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.01 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 23.48 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 25.36 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 23.93 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 22.80 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.44 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:07, 24.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:06, 22.93 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 22.24 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/DNAJC19.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.00 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:22, 19.73 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.68 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.24 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.45 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 25.58 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.90 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:09, 23.29 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.11 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.58 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 27.57 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ELP5.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.43 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.38 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.42 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.55 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.37 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 29.09 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.47 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.86 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:07, 25.23 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:06, 23.84 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 22.86 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/NCAPG.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.91 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.25 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.05 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.67 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.23 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.96 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:11, 21.49 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:13<00:10, 21.25 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:14<00:08, 21.02 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:16<00:07, 21.12 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:17<00:05, 21.10 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/STX18.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 19.02 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.55 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.82 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.63 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 26.44 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.74 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.54 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.11 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.24 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.82 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.95 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.74 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:18, 25.84 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:02<00:15, 28.80 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:03<00:13, 29.88 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:04<00:12, 30.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:05<00:11, 30.42 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:06<00:10, 28.90 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:07<00:09, 29.33 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:08<00:08, 29.76 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:09<00:07, 29.78 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:10<00:06, 30.03 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 27.06 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 25.53 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PSMD1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.77 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.36 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:16, 24.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 26.54 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:12, 27.60 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:10, 28.34 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:09, 28.82 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 29.27 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.72 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:12<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:13<00:04, 29.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/FAM32A.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.83 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.50 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.23 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.75 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.18 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.94 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.44 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.37 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.40 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.80 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.99 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/HSD17B10.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.72 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.56 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:17, 23.04 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:14, 25.68 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:14, 23.73 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:13, 22.65 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:12, 22.34 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 24.23 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:08, 25.89 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 26.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 27.69 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.37 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/ATP6V1C1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.97 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.01 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.79 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:16, 22.66 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 24.88 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.49 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.67 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.52 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.18 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 28.14 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 28.61 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/RPS11.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:25, 18.62 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.32 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.96 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.25 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:16, 20.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:09<00:14, 21.03 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:10<00:12, 21.77 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:11<00:10, 23.78 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:08, 25.58 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:06, 26.84 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:14<00:05, 27.81 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:15<00:04, 28.48 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/CWC22.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.88 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.35 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:16, 21.89 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:14, 24.28 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:11, 25.98 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 27.11 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:08, 28.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 29.03 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.38 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.75 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 29.94 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/MRPL21.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.82 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.26 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.06 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.18 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.98 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.09 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 26.80 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 27.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.53 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 28.99 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.30 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.08 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/SRRM2.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.85 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.30 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:18, 21.41 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 24.30 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 26.20 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 27.42 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:11, 25.05 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:10, 23.79 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:12<00:09, 22.80 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:13<00:08, 22.09 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:15<00:07, 21.64 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:16<00:05, 21.45 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/PTPMT1.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.98 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.47 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 21.11 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:06<00:17, 21.46 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:07<00:15, 22.08 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:08<00:12, 24.40 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:09<00:10, 26.03 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:10<00:09, 27.32 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:11<00:07, 28.33 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:12<00:06, 29.06 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 29.48 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:04, 26.66 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/EIF3B.dataset
Creating dataset.



Map (num_proc=16):   0%|          | 0/500 [00:00<?, ? examples/s][A
Map (num_proc=16):   6%|▋         | 32/500 [00:01<00:24, 18.87 examples/s][A
Map (num_proc=16):  13%|█▎        | 64/500 [00:03<00:21, 20.41 examples/s][A
Map (num_proc=16):  19%|█▉        | 96/500 [00:04<00:19, 20.33 examples/s][A
Map (num_proc=16):  26%|██▌       | 128/500 [00:05<00:15, 23.48 examples/s][A
Map (num_proc=16):  32%|███▏      | 159/500 [00:06<00:13, 25.46 examples/s][A
Map (num_proc=16):  38%|███▊      | 190/500 [00:07<00:11, 26.75 examples/s][A
Map (num_proc=16):  44%|████▍     | 221/500 [00:08<00:10, 27.85 examples/s][A
Map (num_proc=16):  50%|█████     | 252/500 [00:09<00:08, 28.57 examples/s][A
Map (num_proc=16):  57%|█████▋    | 283/500 [00:10<00:07, 29.09 examples/s][A
Map (num_proc=16):  63%|██████▎   | 314/500 [00:11<00:06, 29.36 examples/s][A
Map (num_proc=16):  69%|██████▉   | 345/500 [00:13<00:05, 26.10 examples/s][A
Map (num_proc=16):  75%|███████▌  | 376/500 [00:14<00:05, 24.19 

Saved tokenized_dataset to /data/scratch/bty416/scFMs/data/tokenized_data/k562_pert/WDR24.dataset





In [None]:
k562_all_perts_embex = {}
k562_non_exp_gene = k562_mask.columns[k562_mask.sum() == 0]

k562_tokenized_dir = '/data/scratch/bty416/scFMs/data/tokenized_data/k562_pert'
k562_out_dir = f"{d_path}/embeddings/perts/k562"

k562_all_perts_embex = embed_pert_data(k562_perts, k562_all_perts_embex, k562_non_exp_gene, 
                                       k562_tokenized_dir, k562_out_dir,dataset='k562')

Embedding for RPS27A already generated. Adding to dict ...
Embedding for UTP15 already generated. Adding to dict ...
Embedding for NEDD1 already generated. Adding to dict ...
Embedding for PFDN5 already generated. Adding to dict ...
Embedding for TRMT10C already generated. Adding to dict ...
Embedding for PSMB1 already generated. Adding to dict ...
Embedding for PSMC6 already generated. Adding to dict ...
Embedding for RPS9 already generated. Adding to dict ...
Embedding for CHMP2A already generated. Adding to dict ...
Embedding for ERAL1 already generated. Adding to dict ...
Embedding for BOP1 already generated. Adding to dict ...
Embedding for ACIN1 already generated. Adding to dict ...
Embedding for CEP97 already generated. Adding to dict ...
Embedding for C7orf26 already generated. Adding to dict ...
Embedding for SYF2 already generated. Adding to dict ...
Embedding for RNF123 already generated. Adding to dict ...
Embedding for SPCS2 already generated. Adding to dict ...
Embedding 

In [48]:
len(k562_all_perts_embex)

1866

In [62]:
for emb in k562_all_perts_embex.values():
    print(emb.shape) 

(500, 256)
(3, 256)
(500, 256)
(94, 256)
(500, 256)
(92, 256)
(5, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(111, 256)
(500, 256)
(500, 256)
(8, 256)
(500, 256)
(334, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(292, 256)
(500, 256)
(500, 256)
(500, 256)
(7, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(425, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(158, 256)
(500, 256)
(416, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(78, 256)
(500, 256)
(500, 256)
(43, 256)
(500, 256)
(500, 256)
(271, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(500, 256)
(104, 256)
(10, 256)
(27, 256)
(500, 256)
(47, 256)
(38, 256)
(500, 256)
(500, 256)
(189