/
find_mping_seqs.pl
executable file
·110 lines (97 loc) · 2.98 KB
/
find_mping_seqs.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl -w
use File::Spec;
use Getopt::Long;
use strict;
my $genomeFasta;
my $fq_1;
my $fq_2;
my $target;
my $len_cutoff;
GetOptions(
'1|fq_1:s' => \$fq_1,
'2|fq_2:s' => \$fq_2,
'h|help' => \&getHelp,
);
if ( !defined $fq_1 or !defined $fq_2 ) {
print "\n\nPlease provide 2 paired fastq files\n";
&getHelp();
}
unless ( -e $fq_1 ) {
print "$fq_1 does not exist. Check file name.\n";
&getHelp();
}
unless ( -e $fq_2 ) {
print "$fq_2 does not exist. Check file name.\n";
&getHelp();
}
sub getHelp () {
print "
usage:
./find_mping_insertionSites.pl [-g chromosome_genome_fasta][-t chromosome_to_search][-1 fastq_file_1] [-2 fastq_file_2][-h]
options:
-1 STR fastq file 1 (.fq or .fastq) [no default]
-2 STR fastq file 2 (.fq or .fastq) [no default]
-h this message
";
exit 1;
}
my $genome_path = File::Spec->rel2abs($genomeFasta);
my $current = File::Spec->curdir();
my $current_dir = File::Spec->rel2abs($current);
`bowtie-build -f $genome_path bowtie_build_index`
if !-e "bowtie_build_index.1.ebwt";
my @fq;
my @fa;
foreach my $fq ( $fq_1, $fq_2 ) {
my $fq_path = File::Spec->rel2abs($fq);
push @fq, $fq_path;
if ( $fq =~ /(\/.+\/)?(\S+)\.(fq|fastq)$/ ) {
my $fa = "$2.fa";
#print "fa: $fa\n";
push @fa, $fa;
if ( !-e $fa ) {
open INFQ, $fq_path or die $1;
open OUTFA, ">$fa" or die $1;
while ( my $header = <INFQ> ) {
my $seq = <INFQ>;
my $qual_header = <INFQ>;
my $qual = <INFQ>;
die "ERROR: expected \'\@\' but saw $header"
if substr( $header, 0, 1 ) ne '@';
print OUTFA ">", substr( $header, 1 );
print OUTFA $seq;
}
close INFQ;
close OUTFA;
}
}
else {
print
"$fq does not seem to be a fastq based on the file extension. It should be fq or fastq\n";
&getHelp();
}
}
#create mping.fa
if ( !-e "mping.fa" ) {
open MPING, ">mping.fa" or die $!;
print MPING
">mping gi|22830894|dbj|AB087615.1| Oryza sativa Japonica Group transposon mPing/miniSNOOPY, complete sequence
GGCCAGTCACAATGGGGGTTTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGAATAAAAAATG
ATTATTTGCATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGGTGAAACTCGTCAGCGTCGTT
TCCAAGTCCTCGGTAACAGAGTGAAACCCCCGTTGAGGCCGATTCGTTTCATTCACCGGATCTCTTGCGT
CCGCCTCCGCCGTGCGACCTCCGCATTCTCCCGCGCCGCGCCGGATTTTGGGTACAAATGATCCCAGCAA
CTTGTATCAATTAAATGCTTTGCTTAGTCTTGGAAACGTCAAAGTGAAACCCCTCCACTGTGGGGATTGT
TTCATAAAAGATTTCATTTGAGAGAAGATGGTATAATATTTTGGGTAGCCGTGCAATGACACTAGCCATT
GTGACTGGCC";
close MPING;
}
#blat fa files against mping.fa
my @flanking_fq;
for ( my $i = 0 ; $i < 2 ; $i++ ) {
my $fa = $fa[$i];
my $fq = $fq[$i];
`blat -minScore=10 -tileSize=7 mping.fa $fa $fa.blatout`
if !-e "$fa.blatout";
my $file_num = $i + 1;
`perl ~/bin/get_mping_fa_for_each_read.pl $fa.blatout $fq`;
}