In [15]:
import config_readwrite as crw
import glob
import numpy as np
import os, sys
import subprocess as sp
from time import sleep

BUILD= "hs1" #"rhemac10"# "hs1" #"hg38"

PATH = f"/wynton/home/ahituv/fongsl/dna/{BUILD}/"
LEN = "16"
N_ORDER_MUTS ="1"

section = LEN + "mers"
config_tag = f"config.{BUILD}.pool.ini"
cfn_full = os.path.join("/wynton/home/ahituv/fongsl/nullomers/bin-generate", config_tag)
config, cfn = crw.read(config_tag)


# functiona

In [16]:
def nFilesNBatches(array):
    """
    return count of N files in array, lowest number of batches to run. 
    
    input 
        array (str) - full path to array from config
        
    require 
        numpy as np. 
    
    method
        1. count number of files in array
        2. if NFILES > 4, determine batch size
        3. determine number of batches (between 5 and 23) as lowest common denominator to run.
            # 23 = arbitrary prime number. For np.arange, must add one. 
        4. else, NFILES <=4, so run all the jobs at once (i.e. NBATCHES =1) 
        
    return 
        NFILES (int) - count of array files
        NBATCHES(int) - lowest number of batches to run
    """
    
    #1
    NFILES = sum(1 for i in open(array, "r"))
    
    #2
    if NFILES > 4:
    
        NBATCHES = 0
        
        #3
        for n in np.arange(5,23+1):
            if NFILES%n ==0:
                NBATCHES = n
    #4                        
    else:
        NBATCHES = 1
        
    return NFILES, NBATCHES

# set up
## config

In [17]:
cmd = [
    "python",
    "/wynton/home/ahituv/fongsl/nullomers/bin-generate/write_config-pool.py",
    BUILD,
    LEN
]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

/wynton/home/ahituv/fongsl/dna/hs1/kmers/16mers/
made config -  /wynton/home/ahituv/fongsl/nullomers/bin-generate/config.hs1.pool.ini
python /wynton/home/ahituv/fongsl/nullomers/bin-generate/write_config-pool.py hs1 16


## pool kmer array

In [18]:
cmd = ["python",
       "/wynton/home/ahituv/fongsl/nullomers/bin-generate/pool_array.py",
       cfn,
       PATH,
       BUILD,
       LEN,
       N_ORDER_MUTS
       ]
sp.call(" ".join(cmd), shell=True)
" ".join(cmd)

wrote pool array /wynton/home/ahituv/fongsl/nullomers/bin-generate/arrays/array-hs1.16mers.5mer.1.tsv


'python /wynton/home/ahituv/fongsl/nullomers/bin-generate/pool_array.py config.hs1.pool.ini /wynton/home/ahituv/fongsl/dna/hs1/ hs1 16 1'

## batch size

In [19]:
# read config to get array
config, cfn = crw.read(config_tag)

ARRAY = config[section]["array"]
NFILES, NBATCHES = nFilesNBatches(ARRAY)
NFILES, NBATCHES

(103, 0)

# kmer/nullomer maker

## run

In [17]:
cmd = [
    f"qsub -t 1-{NFILES}:1",
    f"-tc {NBATCHES}",
    "/wynton/home/ahituv/fongsl/nullomers/bin-generate/kmers.sh",
    ARRAY
]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

Your job-array 2485511.1-26:1 ("kmers.sh") has been submitted
qsub -t 1-26:1 -tc 13 /wynton/home/ahituv/fongsl/nullomers/bin-generate/kmers.sh /wynton/home/ahituv/fongsl/nullomers/bin-generate/arrays/array-hg38.11mers.4.tsv


## rerun any job number in array

In [None]:
"""
RE RUN JOB NUMBERS ONLY. NOT CHROMOSOME NUMBERS.  
"""

RERUNS = ["13"]

for RUN in RERUNS:
    cmd = [
        "qsub",
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/kmers-rerun.sh",
        RUN,
        ARRAY,

    ]
    sp.call(" ".join(cmd), shell=True)
" ".join(cmd)
print(" ".join(cmd))

# mutagenesis

## run

In [20]:
cmd = [f"qsub -t 1-{NFILES}:1 -tc {NBATCHES}",
       "/wynton/home/ahituv/fongsl/nullomers/bin-generate/mutagenize-array-pool.sh",
       ARRAY,
       ]
print(" ".join(cmd))
sp.call(" ".join(cmd), shell=True)

qsub -t 1-103:1 -tc 0 /wynton/home/ahituv/fongsl/nullomers/bin-generate/mutagenize-array-pool.sh /wynton/home/ahituv/fongsl/nullomers/bin-generate/arrays/array-hs1.16mers.5mer.1.tsv
Your job-array 3447428.1-103:1 ("mutagenize-array-pool.sh") has been submitted


0

## rerun 
 
 1 run per SGE job


In [None]:
"""
RERUN MUTAGENESIS ONLY
"""

rerun = [] #"1"
for run in rerun:
    cmd = ["qsub", 
      "/wynton/home/ahituv/fongsl/nullomers/bin-generate/rerun-mutagenize-array.sh", 
        run,
        LEN,
        N_ORDER_MUTS,

      ]
    print(" ".join(cmd))
    sp.call(" ".join(cmd), shell=True)


# concatenate all the n-order files for different keys

In [None]:
ORDER_OUT = os.path.join(PATH, f"nullomers.{LEN}mers.order{N_ORDER_MUTS}.tsv")
cmd = ["cat", os.path.join(PATH,"kmers", f"{LEN}mers", "order.*.tsv"), ">", ORDER_OUT]
print(" ".join(cmd))
sp.call(" ".join(cmd), shell=True)

config[section][f"order{N_ORDER_MUTS}"] = ORDER_OUT
crw.write(config, cfn)