-
Notifications
You must be signed in to change notification settings - Fork 1
/
bed2fasta-http
executable file
·50 lines (41 loc) · 1.13 KB
/
bed2fasta-http
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/gawk -f
BEGIN {
db = "";
db = ARGV[1];
if (db=="") {
print "no database name specified for bed2fasta! -h for help." > "/dev/stderr";
noend=1
exit(1);
}
if (db=="-h") {
print "bed2fasta-http: get fasta sequence from given assembly (parameter 1) for given regions (bed from stdin) with a dumb and simple wget request to UCSC.";
print "remember to sleep enough between requests, UCSC is unhappy otherwise";
noend=1
exit;
}
c = 0;
ARGV[1]="-"
}
/^track/ { next }
/^#/ {next}
// {
split($0, fields)
seq = fields[1];
begin = fields[2];
end = fields[3];
strand="-"
if (length(fields)>3) {
name=fields[4];
score=fields[5];
strand=substr(fields[6],1,1);
}
else {
strand="+";
}
print "Downloading " seq ":" begin "-" end "/" strand > "/dev/stderr";
system("fastaFromUCSC " seq " " begin " " end " " db " "strand"");
c+=1;
}
END { if (noend=0) {
print "Got " c " sequences, check the results! (invalid coords will lead to wrong sequence)" > "/dev/stderr" ; }
}