-
Notifications
You must be signed in to change notification settings - Fork 24
/
dnase_to_javatreeview.py
executable file
·84 lines (71 loc) · 3.74 KB
/
dnase_to_javatreeview.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python
import argparse, pyDNase, csv, random
import numpy as np
from clint.textui import puts, progress
import warnings
parser = argparse.ArgumentParser(description='Writes a JavaTreeView file based on the regions in reads BED file and the reads in reads BAM file')
parser.add_argument("-w", "--window_size", help="Size of flanking area around centre of the regions to plot (default: 100)",default=100,type=int)
parser.add_argument("-i",action="store_true", help="Ignores strand information in BED file",default=False)
parser.add_argument("-o",action="store_true", help="Orders output the same as the input (default: orders by FOS)",default=False)
parser.add_argument("-a",action="store_true", help="Write absolute cut counts instead strand imbalanced counts",default=False)
parser.add_argument("-n",action="store_true", help="Normalise the cut data for each region between 0 and 1",default=False)
parser.add_argument("-c",action="store_true", help="Disable memory caching (low RAM mode)",default=False)
parser.add_argument("-b",action="store_true", help="Normalise for cutting bias",default=False)
parser.add_argument("-A",action="store_true", help="ATAC-seq mode (default: False)",default=False)
parser.add_argument("-bf", "--bias-file", help="Location of the sorted, index",default = None,type=str)
parser.add_argument("-r",action="store_true", help="Randomise the ordering of the output",default=False)
parser.add_argument("regions", help="BED file of the regions you want to generate the heatmap for")
parser.add_argument("reads", help="The BAM file containing the read data")
parser.add_argument("output", help="filename to write the CSV output to")
args = parser.parse_args()
reads = pyDNase.BAMHandler(args.reads,caching= not args.c, ATAC=args.A)
if args.b:
if args.bias_file != None:
freads = pyDNase.BAMHandlerWithBias(pyDNase.FASTAHandler(args.bias_file),args.reads,caching=not args.c, ATAC=args.A)
else:
raise ValueError("No FASTA file provided for bias correction!")
regions = pyDNase.GenomicIntervalSet(args.regions)
if args.i:
for each in regions:
each.strand = "+"
if not args.c:
puts("Caching reads...")
for i in progress.bar(regions):
reads[i]
if args.o:
sorter = lambda x : x.importorder
else:
if args.c:
warnings.warn("You've disabled memory caching, which can cause a ~2x slowdown when sorting by FOS")
sorter = lambda x : x.score
puts("Ordering intervals by FOS")
for i in progress.bar(regions):
i.score = reads.FOS(i)
puts("Resizing Regions to {0}".format(args.window_size))
regions.resizeRegions(toSize=args.window_size)
outfile = csv.writer(open(args.output,"w"),delimiter="\t")
#Writes the header file
outfile.writerow(["GID", "ID", "NAME"] + [i+1 for i in range(len(reads[regions[0]]["+"]))])
def normalise_cuts(cuts):
normalised_cuts = np.array(cuts,dtype="float")
normalised_cuts = ((normalised_cuts - normalised_cuts.min()) / (normalised_cuts.max() - normalised_cuts.min()))
return normalised_cuts
puts("Writing to JTV...")
if args.r:
# This is done instead of using a random key to sorted() as it is O(n), whereas sorted() is O(n log n)
region_iterator = [i for i in regions]
random.shuffle(region_iterator)
else:
region_iterator = sorted(regions, key = sorter)
for i in progress.bar(region_iterator):
cuts = reads[i]
if args.a:
newarray = np.add(cuts["+"], cuts["-"])
if args.n:
newarray = normalise_cuts(newarray)
else:
if args.n:
cuts["+"] = normalise_cuts(cuts["+"])
cuts["-"] = normalise_cuts(cuts["-"])
newarray = np.subtract(cuts["+"], cuts["-"])
outfile.writerow(["NULL","NULL",i.chromosome + ":" + str(i.startbp) + ":" + str(i.endbp)] + newarray.tolist())