forked from dence/trichy_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
assemble_fastqs.py
executable file
·84 lines (66 loc) · 2.39 KB
/
assemble_fastqs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#Daniel Ence
#01/25/2016
#read in the taxonomer output
#put read IDs in a hash, mapped to the seqID to which they were classified
#select those reads from a fastq (use external script??)
#output to new fastq
#Assemble the new fastq with velvet (velveth, then velvetg)
#arguments to take in:
#taxonomer output
#fastqfile
#prefix for outputs/intermediate files
#
import argparse
import sys
import os
import subprocess
from Bio import SeqIO
import re
import multiprocessing as mp
import glob
import copy_reg
import types
import pdb
def main(outputDirectory,cpus):
tmp = assemble_fastqs(outputDirectory)
tmp.assemble_batch_fastqs(cpus)
#author: Sri's answer in this thread
#http://stackoverflow.com/questions/11726809/python-efficient-workaround-for-multiprocessing-a-function-that-is-a-data-membe
def _reduce_method(meth):
return (getattr,(meth.__self__,meth.__func__.__name__))
copy_reg.pickle(types.MethodType,_reduce_method)
class assemble_fastqs():
def __init__(self, outputDirectory):
self.outputDirectory = outputDirectory
def assemble_batch_fastqs(self,cpus):
print "here outputDirectory is:\t" + self.outputDirectory
print "here cpus is:\t" + cpus
#get a list of files in the output directory
fastqs = glob.glob( self.outputDirectory + "/*classified_reads.fastq")
#fastqs = glob.glob("outputDirectory" + "/")
#print "fastqs is:\t"
#print fastqs
#make a list of command duples
#run N(=cpus) number of assembly procs
pool = mp.Pool(processes=int(cpus))
pool.map(self.assemble_fastq, fastqs)
#map(self.assemble_fastq, fastqs)
def assemble_fastq(self, fastq):
print "running velvet for " + fastq + "\n"
velvetDir = fastq + "_velvet"
velvetBin = "/home/dence/applications/velvet/"
mkdir_command = "mkdir " + velvetDir
if not os.path.exists(velvetDir):
print mkdir_command
os.mkdir(velvetDir)
if not os.path.exists(velvetDir + "/" + "contigs.fa"):
velvetH_command = velvetBin + "/" + "velveth" + " " + velvetDir + " 29 " + " -fastq " + fastq
print velvetH_command
subprocess.call(velvetH_command, shell=True)
velvetG_command = velvetBin + "/" + "velvetg" + " " + velvetDir + " -cov_cutoff auto -exp_cov auto "
print velvetG_command
subprocess.call(velvetG_command,shell=True)
else:
"Finished running velvet for:\t" + velvetDir + "/" + "contigs.fa"
if __name__ == "__main__":
main(sys.argv[1],sys.argv[2])