# Build Mutagenesis Libraries with SEAM
For each sequence in the activity library, generate a mutagenesis library using squid's RandomMutagenesis.

**Kernel**: Use SEAM_revisions venv (`/grid/wsbs/home_norepl/pmantill/SEAM_revisions/SEAM_revisions/.venv/`)

In [1]:
import os
import numpy as np
import pandas as pd
import h5py
from pathlib import Path
import squid

2026-02-28 17:23:11.109459: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-28 17:23:13.348220: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-28 17:23:14.353075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /cm/local/apps/gcc/9.2.0/lib:/cm/lo

In [2]:
# Config
MODEL_NAME = "second_2stageig"  # which activity library to use
LIB_SIZE = 25000       # mutants per sequence
MUT_RATE = 0.10        # 10% mutation rate
SEQ_LENGTH = 230       # enhancer length (no promoter/barcode)
SEED = 42

ACTIVITY_LIB_DIR = Path("/grid/wsbs/home_norepl/pmantill/LentiMPRA_mcs/LentiMoCon/lenti_AGFT/motif_context_swap/library_prep/activity_libraries")
ACTIVITY_LIB_PATH = ACTIVITY_LIB_DIR / MODEL_NAME / f"k562_activity_library_{MODEL_NAME}.csv"

OUT_DIR = ACTIVITY_LIB_DIR / MODEL_NAME / "mutagenesis_libraries"
OUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Model: {MODEL_NAME}")
print(f"Mutants per seq: {LIB_SIZE}, mutation rate: {MUT_RATE}")
print(f"Output: {OUT_DIR}")

Model: second_2stageig
Mutants per seq: 25000, mutation rate: 0.1
Output: /grid/wsbs/home_norepl/pmantill/LentiMPRA_mcs/LentiMoCon/lenti_AGFT/motif_context_swap/library_prep/activity_libraries/second_2stageig/mutagenesis_libraries


In [3]:
# Load activity library
lib_df = pd.read_csv(ACTIVITY_LIB_PATH)
print(f"Loaded {len(lib_df)} sequences from {ACTIVITY_LIB_PATH.name}")
print(f"Bins: {lib_df['activity_bin'].value_counts().to_dict()}")

# Verify all sequences are 230bp enhancers
seq_lens = lib_df["sequence"].str.len()
assert (seq_lens == SEQ_LENGTH).all(), f"Expected all seqs {SEQ_LENGTH}bp, got: {seq_lens.describe()}"
print(f"All sequences are {SEQ_LENGTH}bp (enhancer only, no promoter/barcode)")

Loaded 3000 sequences from k562_activity_library_second_2stageig.csv
Bins: {'high': 1000, 'mid': 1000, 'low': 1000}
All sequences are 230bp (enhancer only, no promoter/barcode)


In [4]:
# One-hot encoding (ACGT order)
ALPHA_MAP = {'A': 0, 'C': 1, 'G': 2, 'T': 3}

def str_to_onehot(seq_str):
    ohe = np.zeros((len(seq_str), 4), dtype=np.float32)
    for j, base in enumerate(seq_str):
        if base in ALPHA_MAP:
            ohe[j, ALPHA_MAP[base]] = 1.0
    return ohe

# Initialize mutagenizer
mut_generator = squid.mutagenizer.RandomMutagenesis(mut_rate=MUT_RATE, seed=SEED)
print(f"Mutagenizer ready: ~{int(np.ceil(MUT_RATE * SEQ_LENGTH))} mutations per sequence")

Mutagenizer ready: ~23 mutations per sequence


In [5]:
# Generate mutagenesis libraries for each sequence
for i, row in lib_df.iterrows():
    seq_id = row["seq_id"]
    activity_bin = row["activity_bin"]
    out_file = OUT_DIR / f"{activity_bin}_{seq_id}.h5"

    if out_file.exists():
        if (i + 1) % 100 == 0:
            print(f"  [{i+1}/{len(lib_df)}] {seq_id} — already exists, skipping")
        continue

    wt_onehot = str_to_onehot(row["sequence"])
    x_mut = mut_generator(wt_onehot, num_sim=LIB_SIZE)

    with h5py.File(out_file, 'w') as f:
        f.create_dataset('sequences', data=x_mut, dtype='float32',
                         compression='gzip', compression_opts=4)
        f.create_dataset('wt_sequence', data=wt_onehot, dtype='float32')
        f.attrs['seq_id'] = seq_id
        f.attrs['activity_bin'] = activity_bin
        f.attrs['actual_activity'] = row['actual_activity']
        f.attrs['split'] = row['split']
        f.attrs['fold'] = int(row['fold'])
        f.attrs['n_mutants'] = LIB_SIZE
        f.attrs['mut_rate'] = MUT_RATE
        f.attrs['seq_length'] = SEQ_LENGTH
        f.attrs['alphabet'] = 'ACGT'

    if (i + 1) % 100 == 0 or i == 0:
        print(f"  [{i+1}/{len(lib_df)}] {activity_bin}/{seq_id} — {x_mut.shape} saved")

print(f"\nDone! Generated mutagenesis libraries in {OUT_DIR}")

Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23183.74it/s]


  [1/3000] high/ENSG00000111885_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21422.58it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22174.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22237.83it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 20640.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24372.63it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21956.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22445.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23617.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23449.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24882.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21605.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22254.57it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21597.13it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 20136.20it/s]
Mutagenesis: 100%|██

  [100/3000] high/peak54698 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23442.78it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21722.65it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21690.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21209.15it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21826.82it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21844.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 20391.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22985.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24789.75it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22488.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22860.08it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23245.00it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21213.38it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 20918.14it/s]
Mutagenesis: 100%|██

  [200/3000] high/peak59732 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28977.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25490.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24856.57it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28690.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28295.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25850.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23330.62it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25948.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26541.99it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28550.24it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29464.62it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27154.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28149.85it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29590.71it/s]
Mutagenesis: 100%|██

  [300/3000] high/peak36556 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 21171.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:02<00:00, 10455.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 14950.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 14823.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 17655.30it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 18135.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 20027.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22362.17it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 14049.97it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 13551.58it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 14390.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 13774.88it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 19034.12it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 15855.48it/s]
Mutagenesis: 100%|██

  [400/3000] high/ENSG00000182308 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 17807.21it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 15844.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 17706.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 17934.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 19726.02it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 19071.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 19814.74it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 18896.60it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 15607.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 16924.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 16790.56it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 16274.91it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 19323.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 15860.66it/s]
Mutagenesis: 100%|██

  [500/3000] high/peak40086_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30569.10it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30776.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30658.09it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24842.91it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30501.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30756.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30473.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30226.65it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28756.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30638.90it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30750.64it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30756.27it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27233.87it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23757.41it/s]
Mutagenesis: 100%|██

  [600/3000] high/ENSG00000115524_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30589.14it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25995.08it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29343.59it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30775.51it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30425.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30661.00it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 22222.78it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30478.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30707.02it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30749.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28777.32it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29587.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30655.75it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30248.32it/s]
Mutagenesis: 100%|██

  [700/3000] high/ENSG00000010244_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24506.21it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24654.56it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30825.36it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30775.82it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30512.24it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24798.14it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30639.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30494.30it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30533.94it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25335.54it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29366.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30643.64it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30786.36it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30933.14it/s]
Mutagenesis: 100%|██

  [800/3000] high/peak137 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24500.77it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30681.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30088.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30750.90it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30594.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25109.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30245.63it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25817.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30611.10it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30072.07it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26195.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30594.71it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30788.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30841.68it/s]
Mutagenesis: 100%|██

  [900/3000] high/peak5406_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30748.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30507.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25853.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30621.09it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30687.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30845.33it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30519.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26816.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26412.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30758.26it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30569.95it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30828.50it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29065.94it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25941.14it/s]
Mutagenesis: 100%|██

  [1000/3000] high/peak63908 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26128.96it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30553.78it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30951.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30643.19it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30617.26it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25106.38it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28451.36it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30731.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31027.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30559.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30855.63it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27309.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30437.81it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30539.08it/s]
Mutagenesis: 100%|██

  [1100/3000] mid/peak69426_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31028.82it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30883.39it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28386.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29777.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23730.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30568.21it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30738.46it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30920.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30524.06it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24893.51it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30845.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30274.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29151.88it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30516.31it/s]
Mutagenesis: 100%|██

  [1200/3000] mid/peak63178_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30847.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30715.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30273.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24759.90it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30826.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30805.53it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30862.44it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30769.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23957.59it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30740.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30619.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30904.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30774.26it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24646.06it/s]
Mutagenesis: 100%|██

  [1300/3000] mid/peak27831_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30876.74it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30802.97it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30780.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24623.16it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30617.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30443.73it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30745.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30932.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24299.30it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30917.56it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30970.91it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30723.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30543.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23549.96it/s]
Mutagenesis: 100%|██

  [1400/3000] mid/peak62687_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30788.77it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30674.49it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30828.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25002.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30004.44it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30930.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30964.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30722.30it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27566.70it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25750.51it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30805.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30611.46it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30934.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30767.66it/s]
Mutagenesis: 100%|██

  [1500/3000] mid/peak48789_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30563.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30621.33it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29707.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26784.10it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30846.49it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30630.19it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29984.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29887.75it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27148.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30660.90it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30542.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30234.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28222.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29419.72it/s]
Mutagenesis: 100%|██

  [1600/3000] mid/peak21632 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30024.06it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24099.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27977.48it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30805.75it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30674.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26063.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28400.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30812.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30669.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30455.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29146.06it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27922.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30220.65it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30717.36it/s]
Mutagenesis: 100%|██

  [1700/3000] mid/peak36504 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30680.49it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30631.00it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25331.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30781.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30704.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30799.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30663.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24551.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30227.56it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30931.19it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30793.92it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30661.93it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29161.50it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31006.46it/s]
Mutagenesis: 100%|██

  [1800/3000] mid/peak60787_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23112.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30549.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30686.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30731.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29363.72it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27577.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30570.08it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30739.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30447.87it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30093.15it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29382.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30376.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30493.06it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30180.86it/s]
Mutagenesis: 100%|██

  [1900/3000] mid/ENSG00000162928_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30434.43it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30637.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25355.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30134.24it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25249.08it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28187.48it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23383.27it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25901.85it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24885.94it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30731.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30806.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30423.59it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30759.81it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24140.87it/s]
Mutagenesis: 100%|██

  [2000/3000] mid/peak61546_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30865.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30659.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30712.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30687.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27292.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30790.37it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30680.23it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29756.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28600.21it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30826.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30817.73it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30717.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30660.83it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27988.58it/s]
Mutagenesis: 100%|██

  [2100/3000] low/peak74605 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30753.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30973.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24508.78it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30841.23it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30984.07it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30844.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30857.12it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23714.03it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31013.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30675.52it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30714.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30790.53it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24019.65it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30601.87it/s]
Mutagenesis: 100%|██

  [2200/3000] low/peak49576_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30784.53it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30844.78it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31030.83it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30660.87it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25511.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30619.09it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30717.01it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26395.51it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29565.76it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30615.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30458.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30416.84it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30853.17it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26482.08it/s]
Mutagenesis: 100%|██

  [2300/3000] low/peak86317_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30502.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30908.82it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30658.53it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25949.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30930.20it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30849.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30917.44it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30045.74it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30662.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30554.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30948.09it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30746.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25723.75it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30850.92it/s]
Mutagenesis: 100%|██

  [2400/3000] low/peak66648 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30914.73it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30690.36it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30685.12it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25996.66it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30328.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30694.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30769.15it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30009.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28003.70it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30221.16it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25359.95it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26473.62it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24640.34it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29173.81it/s]
Mutagenesis: 100%|██

  [2500/3000] low/peak37366 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30767.48it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30798.46it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30708.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30655.42it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27787.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29783.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30924.31it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30666.14it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30757.79it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27310.88it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30561.57it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30874.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30943.09it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30670.19it/s]
Mutagenesis: 100%|██

  [2600/3000] low/peak73233_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31016.19it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29338.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26687.33it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30770.29it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30520.96it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30568.30it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 28336.46it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30143.14it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30952.82it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30862.98it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30557.53it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25343.68it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30689.43it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31027.05it/s]
Mutagenesis: 100%|██

  [2700/3000] low/ENSG00000161973 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29125.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30997.07it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31100.97it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30563.96it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 27093.40it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30922.91it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30611.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30543.61it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30499.56it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23650.16it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31021.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30988.26it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30925.80it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31039.30it/s]
Mutagenesis: 100%|██

  [2800/3000] low/ENSG00000170634 — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30843.85it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30748.27it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30874.71it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29937.89it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 23750.64it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30944.70it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 31092.86it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30847.18it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30625.27it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 25080.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30926.07it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30912.67it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30961.62it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 29676.89it/s]
Mutagenesis: 100%|██

  [2900/3000] low/peak85019_Reversed: — (25000, 230, 4) saved


Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30119.04it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:01<00:00, 24530.22it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30759.59it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30925.69it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30518.55it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30926.35it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26145.19it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30611.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30704.49it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30773.11it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30294.06it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 26670.05it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30881.28it/s]
Mutagenesis: 100%|██████████| 25000/25000 [00:00<00:00, 30858.54it/s]
Mutagenesis: 100%|██

  [3000/3000] low/ENSG00000166971 — (25000, 230, 4) saved

Done! Generated mutagenesis libraries in /grid/wsbs/home_norepl/pmantill/LentiMPRA_mcs/LentiMoCon/lenti_AGFT/motif_context_swap/library_prep/activity_libraries/second_2stageig/mutagenesis_libraries


In [6]:
# Summary: verify outputs
h5_files = sorted(OUT_DIR.glob("*.h5"))
print(f"Total H5 files: {len(h5_files)}")
print(f"Expected: {len(lib_df)}")

# Spot check first file
if h5_files:
    with h5py.File(h5_files[0], 'r') as f:
        print(f"\nSpot check: {h5_files[0].name}")
        print(f"  sequences shape: {f['sequences'].shape}")
        print(f"  wt_sequence shape: {f['wt_sequence'].shape}")
        print(f"  attrs: {dict(f.attrs)}")

# Count by bin
bin_counts = {}
for f_path in h5_files:
    bin_name = f_path.stem.split('_')[0]
    bin_counts[bin_name] = bin_counts.get(bin_name, 0) + 1
print(f"\nFiles per bin: {bin_counts}")

Total H5 files: 3000
Expected: 3000

Spot check: high_ENSG00000000460.h5
  sequences shape: (25000, 230, 4)
  wt_sequence shape: (230, 4)
  attrs: {'activity_bin': 'high', 'actual_activity': 1.331, 'alphabet': 'ACGT', 'fold': 10, 'mut_rate': 0.1, 'n_mutants': 25000, 'seq_id': 'ENSG00000000460', 'seq_length': 230, 'split': 'test'}

Files per bin: {'high': 1000, 'low': 1000, 'mid': 1000}
