Permalink
Browse files

fixed lots of errors and bugs in stats and output files. excel not wo…

…rking yet.
  • Loading branch information...
1 parent 77b987c commit a5ce15a3b6a1cff15384c93430aa90e215152349 mestato committed Jul 7, 2015
Showing with 78 additions and 74 deletions.
  1. +78 −74 hwg_gssr_scripts/findSSRs_post_assembly.pl
@@ -155,13 +155,13 @@
my $SSR_COUNT_COMPOUND = 0;
my $SSR_COUNT_PRIMER = 0;
-my %MOTIFLEN = ('2' => 0,
- '3' => 0,
- '4' => 0);
+my %MOTIFLEN = (2 => 0,
+ 3 => 0,
+ 4 => 0);
-my %MOTIFLEN_w_PRIMERS = ('2' => 0,
- '3' => 0,
- '4' => 0);
+my %MOTIFLEN_w_PRIMERS = (2 => 0,
+ 3 => 0,
+ 4 => 0);
my %MOTIFS = ('|AT|TA|' => 0,
'|AG|GA|CT|TC|' => 0,
'|AC|CA|TG|GT|' => 0,
@@ -171,7 +171,7 @@
'|AAG|AGA|GAA|CTT|TTC|TCT|' => 0,
'|AAC|ACA|CAA|GTT|TTG|TGT|' => 0,
- '|CCA|CAC|CCA|TGG|GTG|TGG|' => 0,
+ '|CCA|CAC|ACC|TGG|GTG|GGT|' => 0,
'|GGC|GCG|CGG|GCC|CCG|CGC|' => 0,
'|AGG|GAG|GGA|CCT|CTC|TCC|' => 0,
@@ -236,6 +236,7 @@ sub main{
addToPrimer3InputFile ($p3_input);
print "$PRIMER3 < $p3_input > $p3_output\n";
my $status = system("$PRIMER3 < $p3_input > $p3_output");
+ print "$status\n";
parseP3_output($p3_output);
print "done.\n";
@@ -248,24 +249,17 @@ sub main{
##---------------------------------------------------------------
## Producing output - statistics
- calculate_stats($stats_out);
+ calculate_stats();
+ print_stats($stats_out);
##---------------------------------------------------------------
## Producing output - Excel
+ #create_excel_file($ssr_xlsx);
-# print "creating Excel workbook...";
-# my ($workbook,$formats) = createExcelWorkbook($ssr_xlsx);
-# print "done.\n";
-#
-# print "generate output...";
-# # generate filehandles
-# my ($di_worksheet, $tri_worksheet, $tetra_worksheet) = initiate_workbooks($workbook, $formats, $project);
-# print "done.\n";
-
- #print "stats...\n";
+ #my ($workbook,$formats) = createExcelWorkbook($ssr_xlsx);
+ #initiate_workbooks($workbook, $formats, $project);
#my $worksheet_stats = printStats($stats_out, $workbook, $formats, $project);
- #
#$worksheet_stats->activate();
#$worksheet_stats->select();
#$workbook->close();
@@ -702,7 +696,7 @@ sub create_flat_files{
$SSR_STATS{$ssr_id}{MOTIF},
$SSR_STATS{$ssr_id}{NO_REPEATS},
$SSR_STATS{$ssr_id}{START},
- $SSR_STATS{$ssr_id}{END},
+ $SSR_STATS{$ssr_id}{END},
);
print OUTS "\n";
@@ -711,13 +705,13 @@ sub create_flat_files{
if($SSR_STATS{$ssr_id}{COMPOUND} == 0 &&
$SSR_STATS{$ssr_id}{FORWARD} =~ /\S/
){
- if(length $SSR_STATS{$ssr_id}{MOTIF_LEN} == 2){
+ if($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 2){
_print_primer_flat_file_line($di_fh, $ssr_id);
}
- elsif(length $SSR_STATS{$ssr_id}{MOTIF_LEN} == 3){
+ elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 3){
_print_primer_flat_file_line($tri_fh, $ssr_id);
}
- elsif(length $SSR_STATS{$ssr_id}{MOTIF_LEN} == 4){
+ elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 4){
_print_primer_flat_file_line($tetra_fh, $ssr_id);
}
}
@@ -787,38 +781,45 @@ sub calculate_stats{
$SSR_COUNT_COMPOUND++;
}
else{
- $MOTIFLEN{ $SSR_STATS{$ssr_id}{MOTIFLEN} }++;
+ my $motif_len = $SSR_STATS{$ssr_id}{MOTIF_LENGTH} ;
+ #print "motif length is $motif_len\n";
+ $MOTIFLEN{$motif_len}++;
my $motifUC = uc($SSR_STATS{$ssr_id}{MOTIF});
foreach my $group (keys %MOTIFS) {
if($group =~ /\|$motifUC\|/){
- print "Incrementing $group for $motifUC\n";
+ #print "Incrementing $group for $motifUC\n";
$MOTIFS{$group}++;
}
}
if($SSR_STATS{$ssr_id}{FORWARD} =~ /\S/){
$SSR_COUNT_PRIMER++;
- $MOTIFLEN_w_PRIMERS{ $SSR_STATS{$ssr_id}{MOTIFLEN} }++;
+ $MOTIFLEN_w_PRIMERS{$motif_len}++;
}
}
}
}
-sub printStats{
+sub print_stats{
my $stats_out = $_[0]; # file name
open (OUTS, ">".$stats_out) || die "ERROR cannot open $stats_out\n";
print OUTS 'SSR Summary Report\n';
print OUTS "Analsis of $SEQ_COUNT sequences\n";
print OUTS "$TIME\n";
+ print OUTS "\n";
print OUTS "Number of sequences with at least one SSR\t$SEQ_w_SSRS\n";
- print OUTS "Number of SSRs identified\t$SSR_COUNT\n\n";
- print OUTS "Number of compound SSRs: $SSR_COUNT_COMPOUND\n";
- print OUTS "Number of SSRs with primers*: $SSR_COUNT_COMPOUND\n";
- print OUTS "*No primers are designed for compound SSRs\n";
+ print OUTS "Number of SSRs identified\t$SSR_COUNT\n";
+ print OUTS "\n";
+ print OUTS "Number of compound SSRs*: $SSR_COUNT_COMPOUND\n";
+ print OUTS "Number of SSRs with primers**: $SSR_COUNT_PRIMER\n";
+ print OUTS "\n";
+ print OUTS "*Compound SSRs are defined as any SSRs next to each or separated by less than 15 bases\n";
+ print OUTS "**No primers are designed for compound SSRs\n";
print OUTS "\n";
+ print OUTS "Parameters used for identifying SSRS:\n";
print OUTS "Base Pairs in Motif\tMin # Reps\tMax # Reps\n";
print OUTS "--------------------------------------\n";
print OUTS "2 (Dinucleotides)\t$MIN_REPS_2bp\t$MAX_REPS_2bp\n";
@@ -834,15 +835,16 @@ sub printStats{
print OUTS "$group\t$MOTIFS{$group}\n";
}
print OUTS "\n";
- print OUTS "Motif Pattern Length\tNumber of SSRs Found\n";
+ print OUTS "Motif Pattern Length\tNumber of SSRs\n";
print OUTS "--------------------------------------\n";
foreach $group (sort keys %MOTIFLEN){
print OUTS "$group\t$MOTIFLEN{$group}\n";
}
- print OUTS "SSRS with PRIMERS\n";
- print OUTS "Motif Pattern Length\tNumber of SSRs Found\n";
+ print OUTS "\n";
+ print OUTS "SSRS with Primers \n";
+ print OUTS "Motif Pattern Length\tNumber of SSRs\n";
print OUTS "--------------------------------------\n";
foreach $group (sort keys %MOTIFLEN_w_PRIMERS){
@@ -864,7 +866,9 @@ sub printStats{
# my $tri_worksheet = _initiate_worksheet($workbook, $formats, $project, "Trinucleotide");
# my $tetra_worksheet = _initiate_worksheet($workbook, $formats, $project, "Tetranucleotide");
#
-# return($di_worksheet, $tri_worksheet, $tetra_worksheet);
+# _print_worksheet($di_worksheet, $formats, $project);
+# _print_worksheet($tri_worksheet, $formats, $project);
+# _print_worksheet($tetra_worksheet, $formats, $project);
#}
################################################################
#sub _initiate_worksheet{
@@ -1175,45 +1179,45 @@ sub printStats{
#}
#
################################################################
-#
-#sub createExcelWorkbook{
-#
-# my $ssr_xlsx = $_[0];
-#
-# my $workbook; # the excel workbook
-# my %formats;
-# my %header;
-# my %text;
-# my %bigheader;
-# my %highlight;
-#
-#
-# # Create an excel workbook
-# $workbook = Excel::Writer::XLSX->new("$ssr_xlsx");
-#
-# # Setup the four formats that will be necessary for the excel spreadsheet
-# %header = (font => 'Calibri',
-# size => 12,
-# bold => 1,
-# color => 'black',
-# align => 'left',
-# text_wrap => 1);
-#
-# %text = (font => 'Calibri',
-# size => 12,
-# color => 'black',
-# align => 'left',
-# text_wrap => 1);
-#
-# #add the formats to the workbook
-# $formats{header} = $workbook->add_format(%header);
-# $formats{text} = $workbook->add_format(%text);
-#
-# return ($workbook,\%formats);
-#
-#}
-#
-#
+
+sub createExcelWorkbook{
+
+ my $ssr_xlsx = $_[0];
+
+ my $workbook; # the excel workbook
+ my %formats;
+ my %header;
+ my %text;
+ my %bigheader;
+ my %highlight;
+
+
+ # Create an excel workbook
+ $workbook = Excel::Writer::XLSX->new("$ssr_xlsx");
+
+ # Setup the formats that will be necessary for the excel spreadsheet
+ %header = (font => 'Calibri',
+ size => 12,
+ bold => 1,
+ color => 'black',
+ align => 'left',
+ text_wrap => 1);
+
+ %text = (font => 'Calibri',
+ size => 12,
+ color => 'black',
+ align => 'left',
+ text_wrap => 1);
+
+ #add the formats to the workbook
+ $formats{header} = $workbook->add_format(%header);
+ $formats{text} = $workbook->add_format(%text);
+
+ return ($workbook,\%formats);
+
+}
+
+
################################################################
#sub _printUsage {
# print "Usage: $0.pl <arguments>";

0 comments on commit a5ce15a

Please sign in to comment.