-
Notifications
You must be signed in to change notification settings - Fork 1
/
bedRemoveOverlap
executable file
·65 lines (51 loc) · 1.92 KB
/
bedRemoveOverlap
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python
import bed
import sys
from optparse import OptionParser
# === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
parser = OptionParser("usage: %prog [options] file \nReads bed from file (stdin ok)and joins all features that are overlapped by immediately preceding features. (features have to bed sorted, use ucsc's bedSort)")
#parser.add_option("-v", "--inverse", dest="inverse", action="store_true", help="inverse restult, only print lines that DO overlap",default=False)
#parser.add_option("-c", "--concat", dest="concat", action="store_true", help="print all lines but if 2 feats overlap: concat the name of the second feature to the name the first feature",default=False)
parser.add_option("-s", "--wobblestart", dest="tolerance", action="store", type="int",help="tolerate a shift of x basepairs when looking for overlaps, if set to 10 then even features that not completely overlap will be removed",default=0)
(options, args) = parser.parse_args()
tolerance = options.tolerance
# ==== FUNCTIONs =====
def flushClique(clique, tolerance):
""" clean, print, reset"""
lenClique = len(clique)
if lenClique==0:
return
# special cases
if lenClique==1:
print clique[0]
clique = []
return
# generate feature that covers whole clique
starts = []
ends = []
#print clique
names = []
for b in clique:
starts.append(b.start)
ends.append(b.end)
first = clique[0]
print "\t".join([first.chrom, str(first.start), str(first.end), newName])
# ==== MAIN ====
#inv = options.inverse
#concat = options.inverse
beds = bed.parseBedFilename(args[0])
clique = []
last = None
for b in beds:
if last==None:
last=b
continue
if bed.overlap(b, last):
clique.append(last)
last=b
else:
flushClique(clique, tolerance)
clique=[b]
last = b
clique.append(last)
flushClique(clique, tolerance)