Permalink
Browse files

Fasta file output done.

  • Loading branch information...
1 parent 37489ba commit 1671f137675d4eee2a0ee6298e673093ea011b36 mestato committed Jul 7, 2015
Showing with 40 additions and 52 deletions.
  1. +40 −52 hwg_gssr_scripts/findSSRs_post_assembly.pl
@@ -34,7 +34,7 @@
# Output:
# ------
# <input-file-name>.ssr.fasta
-# A fasta file with sequences with a SSR. (Compound SSRs are not considered)
+# A fasta file with sequences with a SSR. (Sequences with compound SSRs are included)
#
# <input-file-name>.ssr_stats.txt
# A text file of statistics about the SSRs discovered.
@@ -221,13 +221,14 @@ sub main{
$p3_output = "$fasta_file.p3out.txt";
$ssr_out = "$fasta_file.ssr_report.txt";
- $ssr_xlsx = "$fasta_file.ssr_report.xlsx";
$fasta_out = "$fasta_file.ssr.fasta";
$stats_out = "$fasta_file.ssr_stats.txt";
$di_primer_out = "$fasta_file.di_primer_report.txt";
$tri_primer_out = "$fasta_file.tri_primer_report.txt";
$tetra_primer_out = "$fasta_file.tetra_primer_report.txt";
+ $ssr_xlsx = "$fasta_file.ssr_report.xlsx";
+
##---------------------------------------------------------------
print "finding SSRs...\n";
process_file($fasta_file, $masked_file);
@@ -245,11 +246,9 @@ sub main{
##---------------------------------------------------------------
## Producing output - Fasta files and flat files
- #($fasta_out);
-
- #print "printing output files...";
- #create_primer_flat_files ($di_primer_out, $tri_primer_out, $tetra_primer_out);
-
+ print "printing output files...";
+ create_flat_files($ssr_out, $di_primer_out, $tri_primer_out, $tetra_primer_out);
+ create_fasta_file($fasta_out);
##---------------------------------------------------------------
## Producing output - statistics
@@ -523,13 +522,13 @@ sub flag_multiSSRs{
## this contig has only one ssr
my $start_index = $starts[0];
my $ssr_id = $contig."_ssr".$start_index;
- $SSR_STATS{$ssr_id}{MULTI} = "False";
+ $SSR_STATS{$ssr_id}{MULTI} = "FALSE";
}
else{
## this contig has multiple ssrs
foreach my $start_index (@starts){
my $ssr_id = $contig."_ssr".$start_index;
- $SSR_STATS{$ssr_id}{MULTI} = "True";
+ $SSR_STATS{$ssr_id}{MULTI} = "TRUE";
}
}
}
@@ -652,49 +651,38 @@ sub parseP3_output{
print "total identical primers: $identical_primer_cnt\n";
}
-################################################################
-#sub printFasta{
-# my $fasta_out = shift;
-#
-# # this subroutine accomplishes two things
-# # 1. adds a MULTI flag to the data hash indicating if the
-# # ssr is the only one in the sequence or one of many
-# # 2. prints a fasta file with sequences with a single ssr
-# # and another with sequences with multiple ssrs
-#
-# open FASTA, ">$fasta_out";
-#
-# foreach my $contig (keys %CONTIG_SSR_STARTS){
-# my @starts = @{ $CONTIG_SSR_STARTS{$contig}};
-# if(@starts == 1){
-# ## this contig has only one ssr
-# my $start_index = $starts[0];
-# my $ssr_id = $contig."_ssr".$start_index;
-# $SSR_STATS{$ssr_id}{MULTI} = "False";
-# #print "\t$ssr_id:FALSE\n";
-# print FASTA ">$contig ".
-# "($SSR_STATS{$ssr_id}{START}-$SSR_STATS{$ssr_id}{END})\n".
-# "$SSR_STATS{$ssr_id}{SEQ}\n";
-# }
-# else{
-# ## this contig has multiple ssrs
-# print FASTA ">$contig (";
-# foreach my $start_index (@starts){
-# my $ssr_id = $contig."_ssr".$start_index;
-# $SSR_STATS{$ssr_id}{MULTI} = "True";
-# #print "\t$ssr_id:TRUE\n";
-# print FASTA "$SSR_STATS{$ssr_id}{START}-$SSR_STATS{$ssr_id}{END} ";
-# }
-# #get the first ssr index just so we can get the sequence
-# my $start_index = $starts[0];
-# my $ssr_id = $contig."_ssr".$start_index;
-# print FASTA ")\n ";
-# print FASTA "$SSR_STATS{$ssr_id}{SEQ}\n";
-# }
-# }
-# close FASTA;
-#
-#}
+sub create_flat_files{
+ my $ssr_out = shift;
+ my $di_primer_out = shift;
+ my $tri_primer_out = shift;
+ my $tetra_primer_out = shift;
+
+}
+
+sub create_fasta_file{
+ my $fasta_out = shift;
+ open FASTA, ">$fasta_out";
+
+ foreach my $contig (keys %CONTIG_SSR_STARTS){
+ my @starts = @{ $CONTIG_SSR_STARTS{$contig}};
+ print FASTA ">$contig (";
+ foreach my $start_index (@starts){
+ my $ssr_id = $contig."_ssr".$start_index;
+ print FASTA "$SSR_STATS{$ssr_id}{START}-$SSR_STATS{$ssr_id}{END} ";
+ if($SSR_STATS{$ssr_id}{COMPOUND} == 'TRUE'){
+ print FASTA "*Compound ";
+ }
+ }
+ #get the first ssr index just so we can get the sequence
+ my $start_index = $starts[0];
+ my $ssr_id = $contig."_ssr".$start_index;
+ print FASTA ")\n ";
+ print FASTA "$SSR_STATS{$ssr_id}{SEQ}\n";
+
+ }
+ close FASTA;
+
+}
################################################################

0 comments on commit 1671f13

Please sign in to comment.