In [2]:
import config_readwrite as crw
import glob
import numpy as np
import os, sys
import subprocess as sp
from time import sleep

BUILD= "hs1" #"rhemac10"# "hs1" #"hg38"


PATH = f"/wynton/home/ahituv/fongsl/dna/{BUILD}/"
LEN = "11"
KEYSIZE="4"
N_ORDER_MUTS ="1"

section = LEN + "mer"
config_tag = f"config.{BUILD}.ini"
config, cfn = crw.read(config_tag)

In [3]:
size = int(KEYSIZE)
print(4**size, 4**(23-size))

256 274877906944


# functiona

In [4]:
def nFilesNBatches(array):
    """
    return count of N files in array, lowest number of batches to run. 
    
    input 
        array (str) - full path to array from config
        
    require 
        numpy as np. 
    
    method
        1. count number of files in array
        2. if NFILES > 4, determine batch size
        3. determine number of batches (between 5 and 23) as lowest common denominator to run.
            # 23 = arbitrary prime number. For np.arange, must add one. 
        4. else, NFILES <=4, so run all the jobs at once (i.e. NBATCHES =1) 
        
    return 
        NFILES (int) - count of array files
        NBATCHES(int) - lowest number of batches to run
    """
    
    #1
    NFILES = sum(1 for i in open(array, "r"))
    
    #2
    if NFILES > 4:
    
        NBATCHES = 0
        
        #3
        for n in np.arange(5,23+1):
            if NFILES%n ==0:
                NBATCHES = n
    #4                        
    else:
        NBATCHES = 1
        
    return NFILES, NBATCHES

# set up
## config

In [5]:
cmd = [
        "python", 
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/write_config.py",
        BUILD,
        LEN, 
        KEYSIZE,
      ]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

/wynton/home/ahituv/fongsl/dna/hs1/kmers/11mers/
python /wynton/home/ahituv/fongsl/nullomers/bin-generate/write_config.py hs1 11 4


## chromosome array

In [6]:
cmd = [
        "python", 
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/make_chr_array.py",
        PATH,
        BUILD,
        config_tag
      ]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

python /wynton/home/ahituv/fongsl/nullomers/bin-generate/make_chr_array.py /wynton/home/ahituv/fongsl/dna/hs1/ hs1 config.hs1.ini


## nullomer array

In [7]:
cmd = ["python", 
      "/wynton/home/ahituv/fongsl/nullomers/bin-generate/make_null_array.py", 
        config_tag,
        PATH
        BUILD,
        LEN,
        KEYSIZE
      ]
sp.call(" ".join(cmd), shell=True)
" ".join(cmd)

256


'python /wynton/home/ahituv/fongsl/nullomers/bin-generate/make_null_array.py config.hs1.ini hs1 11 4'

# kmer maker

## batch size

In [7]:
# read config to get array
config, cfn = crw.read(config_tag)

ARRAY = config["ARRAY"]["FA_CHR"]
NFILES, NBATCHES = nFilesNBatches(ARRAY)
NFILES, NBATCHES 

(25, 5)

## run

In [9]:
cmd = [
        f"qsub -t 1-{NFILES}:1",
        #f"-tc {NBATCHES}", 
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/get_kmer-keys-array.sh",
        ARRAY,
        PATH,
        LEN, 
        KEYSIZE,
      ]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

Your job-array 2306437.1-25:1 ("get_kmer-keys-array.sh") has been submitted
qsub -t 1-25:1 /wynton/home/ahituv/fongsl/nullomers/bin-generate/get_kmer-keys-array.sh /wynton/home/ahituv/fongsl/nullomers/bin-generate/arrays/chr_fa_array-hs1.tsv /wynton/home/ahituv/fongsl/dna/hs1/ 23 7


## rerun any job number in array

In [None]:
#DONE =["21", "25", "22", "20", "19", "23", "16", "12", "14", "13", "10", "8", "6", "9", "7", "3", "2", "1", "4", "5"] #"1"
RERUNS = ["11", "15"]#np.arange(1,25)

In [None]:
for RUN in RERUNS:
    cmd = [
        "qsub", 
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/rerun-get_kmer-keys-array.sh",
        RUN,
        ARRAY,
        PATH,
        LEN, 
        KEYSIZE
      ]
    #sp.call(" ".join(cmd), shell=True)
" ".join(cmd)
print(" ".join(cmd))

# nullomer maker

## batch size

In [9]:
MUT_ARRAY = config[f"{LEN}mer"]["array"]
NFILES, NBATCHES = nFilesNBatches(MUT_ARRAY)
NFILES, NBATCHES

(1024, 16)

## run

In [11]:
## sleep(60*60)

cmd = [
    
       f"qsub -t 1-{NFILES}:1",
       #f"-tc {NBATCHES}", 
      "/wynton/home/ahituv/fongsl/nullomers/bin-generate/get_nullomer-keys-array.sh", 
        MUT_ARRAY,
        PATH,
        LEN,
        KEYSIZE
      ]
sp.call(" ".join(cmd), shell=True)
print(" ".join(cmd))

Your job-array 2385330.1-1024:1 ("get_nullomer-keys-array.sh") has been submitted
qsub -t 1-1024:1 /wynton/home/ahituv/fongsl/nullomers/bin-generate/get_nullomer-keys-array.sh /wynton/home/ahituv/fongsl/nullomers/bin-generate/arrays/array-hs1.12mer.tsv /wynton/home/ahituv/fongsl/dna/hs1/ 12 5


## rerun

In [None]:
RERUNS =["1"] #"1"
for RUN in RERUNS:
    cmd = [
        "qsub", 
        "/wynton/home/ahituv/fongsl/nullomers/bin-generate/rerun-get_nullomer-keys-array.sh",
        RUN,
        MUT_ARRAY,
        PATH,
        LEN, 
        KEYSIZE
      ]
    sp.call(" ".join(cmd), shell=True)
" ".join(cmd)
print(" ".join(cmd))

# combine kmer spectra. 
    No need to combine nullomer spectra. Can keep in sections (ALL.AAA.3mers-nullomers.csv.gz)

# mutagenesis

In [None]:
    os.chdir(OUTDIR)

    # concat KMERS
    os.system(f"cat ALL.*.{WINDOW_SIZE}mers.csv > ALL.{WINDOW_SIZE}mers-kmers.csv && gzip ALL.{WINDOW_SIZE}mers-kmers.csv")

    # concat nullomers
    #os.system(f"cat ALL.*.{WINDOW_SIZE}mers-nullomers.csv > ALL.{WINDOW_SIZE}mers-nullomers.csv && gzip ALL.{WINDOW_SIZE}mers-nullomers.csv")
    
    # zip supporting files. 
    os.system(f"gzip ALL.*.{WINDOW_SIZE}mers.csv")
    os.system(f"gzip ALL.*.{WINDOW_SIZE}mers-nullomers.csv")

## run

In [None]:
N_ORDER_MUTS ="1"

cmd = [f"qsub -t 1-{NFILES}:1 -tc {NBATCHES}", 
      "/wynton/home/ahituv/fongsl/nullomers/bin-generate/mutagenize-array.sh", 
        LEN,
        N_ORDER_MUTS,
        config_tag
       
      ]
sp.call(" ".join(cmd), shell=True)

## rerun 
 
 1 run per SGE job


In [None]:
rerun = [] #"1"
for run in rerun:
    cmd = ["qsub", 
      "/wynton/home/ahituv/fongsl/nullomers/bin-generate/rerun-mutagenize-array.sh", 
        run,
        LEN,
        N_ORDER_MUTS,
        config_tag
      ]
    print(" ".join(cmd))
    sp.call(" ".join(cmd), shell=True)


# concatenate all the n-order files for different keys

In [None]:
ORDER_OUT = os.path.join(PATH, f"nullomers.{LEN}mers.order{N_ORDER_MUTS}.tsv")
cmd = ["cat", os.path.join(PATH,"kmers", f"{LEN}mers", "order.*.tsv"), ">", ORDER_OUT]
print(" ".join(cmd))
sp.call(" ".join(cmd), shell=True)

config[section][f"order{N_ORDER_MUTS}"] = ORDER_OUT
crw.write(config, cfn)

In [12]:
l = [1,4,6]

In [13]:
l.remove(3)

ValueError: list.remove(x): x not in list