-
Notifications
You must be signed in to change notification settings - Fork 1
/
bed2wig
executable file
·133 lines (116 loc) · 5.33 KB
/
bed2wig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/python
from bed import *
import sys
import copy # for shallow copies
from optparse import OptionParser
# === COMMAND LINE INTERFACE, OPTIONS AND HELP ===
parser = OptionParser("usage: %prog [options] args\n - plot the number of bed-features per x-bp window as a wiggle file")
parser.add_option("-w", "--winsize", dest="winsize", help="size of window [default: %default]", type="int", default="200", metavar="SIZE")
parser.add_option("-n", "--name", dest="trackname", help="name of the track [default: FeatPlotWin<Winsize>], or 'notrack'", type="string", default="FeatPlot_Win", metavar="STR")
parser.add_option("-s", "--score", dest="score", help="do not plot number of features in window but plot the sum of the features' scores within sliding window", action="store_true", default=False)
#parser.add_option("-m", "--mindist", dest="distcond", action="append", help="specify minimum distance between all two-tuples of names names, e.g. A-B:50", type="string", metavar="NUMBER")
(options, args) = parser.parse_args()
# ==== FUNCTIONs =====
class FeatureWindow:
""" a windows over features of certain size that keeps track of the of the number of
features of a certain type OR any features (if names==None) """
def __init__(self, features, names, winsize):
self.features = features
self.winFeatures = [] # features currently in the window
self.winSize = winsize
self.counts = {}
self.names = names # these names are tracked
self.winStart = features[0].start
self.winEnd = self.winStart+self.winSize
self.lastPos = features[-1].end
self.seqID=features[0].chrom
self._fillWindow()
def __repr__(self):
d = []
d.append("======WINDOW:========")
d.append("start = "+str(self.winStart))
d.append("end = "+str(self.winEnd))
for k, v in self.counts.iteritems():
d.append("occurences of "+k+" = "+repr(v))
if self.names!=None:
d.append("--NAMES that are tracked:--")
for n in self.names:
d.append(n)
d.append("--FEATURES:--")
for f in self.winFeatures:
d.append(repr(f))
d.append("=====================")
return "\n".join(d)
def shiftRight(self):
""" shift window to the right by one feature and then delete/insert features"""
if self.features==[]:
raise StopIteration
nextF = self.features[0]
diff = nextF.start - self.winEnd + 1
self.winEnd+=diff
self.winStart+=diff
# remove on the left side and ajust counts
for i in reversed(range(0, len(self.winFeatures))):
f = self.winFeatures[i]
if f.start<self.winStart and (self.names==None or f.name in self.names):
self.counts[f.name]-=1
del self.winFeatures[i]
# add on the right side and ajust counts
self._fillWindow()
def _fillWindow(self):
""" takes new features from self.features and put them into the window
until no more features are covered by the window """
while self.features!=[] and self.features[0].start<self.winEnd:
f = self.features.pop(0)
if f.start < self.winStart:
raise RuntimeError, "The bed file has to be sorted!"
if (self.names==None or f.name in self.names ):
self.winFeatures.append(f)
if not f.name in self.counts.keys():
self.counts[f.name]=0
self.counts[f.name]+=1
def countFeatures(self):
return sum(self.counts.values())
# ==== MAIN ====
if len(args)==0:
sys.stderr.write("\nPlease specify a bed-file. Use '-h' for help. \n")
sys.exit(1)
countFts = not options.score
winsize = options.winsize
sys.stderr.write("Parameters: Winsize=%d, UsingScores=%d, files=%s" % (winsize, options.score, args))
for filename in args:
sys.stderr.write("Reading file %s\n" % filename)
if filename=="stdin":
lines = sys.stdin
else:
lines = open(filename, "r").readlines()
fts = parseBedFile(lines)
chrom = fts[0].chrom
# prepare dic: pos -> number of features OR pos -> sum of scores of features
sys.stderr.write("Building dict\n")
ftdic = {}
maxStart = 0
minStart = 99999999999999
for f in fts:
maxStart = max(maxStart, f.start)
minStart = min(minStart, f.start)
if countFts:
ftdic[f.start]=ftdic.get(f.start,0)+1
else:
ftdic[f.start]=ftdic.get(f.start,0)+f.score
#print f.start, f.score
# sum features over range covered by bed
sys.stderr.write("Counting features\n")
trackname = options.trackname + str(winsize)
if not trackname.startswith("notrack"):
print 'track type=wiggle_0 name="%s" description="Plot_Winsize%s_%s" maxHeightPixels=50:50:11 visibility=full autoScale=off windowingFunction=mean color=20,20,150 ' % (trackname, winsize, filename)
print "fixedStep chrom=%s start=%d step=1" % (chrom, minStart+(winsize/2))
num = 0
# init first window
for j in xrange(minStart, minStart+winsize):
num += ftdic.get(j,0)
# shift window by one position and adjust counter
for i in xrange(minStart, maxStart-winsize):
num -= ftdic.get(i,0)
num += ftdic.get(i+winsize,0)
print "\t".join([str(num)])