Permalink
Browse files

Everything working including excel data sheets.

  • Loading branch information...
1 parent 49bb611 commit fa6abafbbe375eab3f6914b1d8f8d0c9f1cd7783 mestato committed Jul 7, 2015
Showing with 112 additions and 186 deletions.
  1. +112 −186 hwg_gssr_scripts/findSSRs_post_assembly.pl
@@ -879,11 +879,13 @@ sub create_excel_file{
my $header_format = $workbook->add_format(%header);
my $text_format = $workbook->add_format(%text);
- $workbook = create_stats_worksheet($workbook, $header_format, $text_format, $project);
+ my $worksheet_stats = create_stats_worksheet($workbook, $header_format, $text_format, $project);
- #my $di_worksheet = _initiate_worksheet($workbook, $formats, $project, "Dinucleotide");
- #my $tri_worksheet = _initiate_worksheet($workbook, $formats, $project, "Trinucleotide");
- #my $tetra_worksheet = _initiate_worksheet($workbook, $formats, $project, "Tetranucleotide");
+ build_data_worksheets($workbook, $header_format, $text_format);
+
+ $worksheet_stats->activate();
+ $worksheet_stats->select();
+ $workbook->close();
}
@@ -988,191 +990,115 @@ sub create_stats_worksheet{
$worksheet->write("B$i", $MOTIFLEN_w_PRIMERS{$group});
}
+ return $worksheet;
}
+##############################################################
+sub build_data_worksheets{
+ my $workbook = shift;
+ my $header_format = shift;
+ my $text_format = shift;
-#sub _initiate_worksheet{
-# my $workbook = $_[0];
-# my $formats = $_[1];
-# my $project = $_[2];
-# my $name = $_[3];
-#
-# my $worksheet = $workbook->add_worksheet($name);
-# $worksheet->set_column('A:A', 60, $formats->{text});
-# $worksheet->set_column('F:G', 30, $formats->{text});
-# #$worksheet->set_column('J:J', 100, $formats->{text});
-# $worksheet->write('A1', "$name Repeats for $project", $formats->{header});
-# $worksheet->write('A2', 'Sequence Name', $formats->{header});
-# $worksheet->write('B2', 'Motif', $formats->{header});
-# $worksheet->write('C2', '# Repeats', $formats->{header});
-# $worksheet->write('D2', 'Start', $formats->{header});
-# $worksheet->write('E2', 'End', $formats->{header});
-# $worksheet->write('F2', 'Forward Primer', $formats->{header});
-# $worksheet->write('G2', 'Reverse Primer', $formats->{header});
-# $worksheet->write('H2', 'Forward Tm', $formats->{header});
-# $worksheet->write('I2', 'Reverse Tm', $formats->{header});
-# $worksheet->write('J2', 'Fragment Size', $formats->{header});
-# #$worksheet->write('J2', 'Sequence', $formats->{header});
-#
-# return $worksheet;
-#}
-
-
- #my $worksheet_stats = printStats($stats_out, $workbook, $formats, $project);
- #$worksheet_stats->activate();
- #$worksheet_stats->select();
- #$workbook->close();
-#sub initiate_workbooks{
-# my $workbook = $_[0]; # file name
-# my $formats = $_[1]; # file name
-# my $project = $_[2]; # file name
-#
-# _print_worksheet($di_worksheet, $formats, $project);
-# _print_worksheet($tri_worksheet, $formats, $project);
-# _print_worksheet($tetra_worksheet, $formats, $project);
-#}
-################################################################
+ my $di_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Dinucleotides");
+ my $tri_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Trinucleotides");
+ my $tetra_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Tetranucleotides");
+
+ my $di_index = 3;
+ my $tri_index = 3;
+ my $tetra_index = 3;
+
+ foreach my $ssr_id (keys %SSR_STATS){
+ # for excel data files, only print SSRs
+ # that have primers
+ if($SSR_STATS{$ssr_id}{COMPOUND} == 0 &&
+ $SSR_STATS{$ssr_id}{FORWARD} =~ /\S/
+ ){
+ if($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 2){
+ _print_excel_file_line($di_worksheet, $di_index, $ssr_id);
+ $di_index++;
+ }
+ elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 3){
+ _print_excel_file_line($tri_worksheet, $tri_index, $ssr_id);
+ $tri_index++;
+ }
+ elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 4){
+ _print_excel_file_line($tetra_worksheet, $tetra_index, $ssr_id);
+ $tetra_index++;
+ }
+ }
+ }
+
+
+}
+
+##############################################################
+sub _initiate_worksheet{
+ my $workbook = $_[0];
+ my $header_format = $_[1];
+ my $text_format = $_[2];
+ my $name = $_[3];
+
+ my $worksheet = $workbook->add_worksheet($name);
+ $worksheet->set_column('A:A', 60, $text_format);
+ $worksheet->set_column('B:E', 10, $text_format);
+ $worksheet->set_column('F:G', 30, $text_format);
+ $worksheet->set_column('H:J', 10, $text_format);
+
+ $worksheet->write('A1', "$name with primers", $header_format);
+ $worksheet->write('A2', 'SSR ID', $header_format);
+ $worksheet->write('B2', 'Motif', $header_format);
+ $worksheet->write('C2', '# Repeats', $header_format);
+ $worksheet->write('D2', 'Start', $header_format);
+ $worksheet->write('E2', 'End', $header_format);
+ $worksheet->write('F2', 'Forward Primer', $header_format);
+ $worksheet->write('G2', 'Reverse Primer', $header_format);
+ $worksheet->write('H2', 'Forward Tm', $header_format);
+ $worksheet->write('I2', 'Reverse Tm', $header_format);
+ $worksheet->write('J2', 'Fragment Size', $header_format);
+
+ return $worksheet;
+}
-#sub _print_worksheet{
-# my $worksheet = $_[0];
-# my $formats = $_[1];
-# my $name = $_[2];
-#
-#
-#}
- #foreach my $group (keys %MOTIFS) {
- # my $motifUC = uc($motif);
- # if($group =~ /\|$motifUC\|/){
- # # If this group contains this motif
- # #print "Incrementing $group for $motif\n";
- # $MOTIFS{$group}++;
- # }
- #}
-##
-## $di_worksheet->write("A$di_index", $contig, $formats->{text});
-## $di_worksheet->write("B$di_index", $motif, $formats->{text});
-## $di_worksheet->write("C$di_index", $cnt, $formats->{text});
-## $di_worksheet->write("D$di_index", $ssrStart, $formats->{text});
-## $di_worksheet->write("E$di_index", $ssrEnd, $formats->{text});
-## $di_worksheet->write("F$di_index", $forward, $formats->{text});
-## $di_worksheet->write("G$di_index", $reverse, $formats->{text});
-## $di_worksheet->write("H$di_index", $left_tm, $formats->{text});
-## $di_worksheet->write("I$di_index", $right_tm, $formats->{text});
-## $di_worksheet->write("J$di_index", $product_size, $formats->{text});
-## #$di_worksheet->write("J$di_index", $seq, $formats->{text});
-## $di_index++;
-##
-## # Increment motif count
-## foreach my $group (keys %MOTIFS) {
-## my $motifUC = uc($motif);
-## if($group =~ /\|$motifUC\|/){
-## # If this group contains this motif
-## my $tmp = $MOTIFS{$group}++;
-## $tmp++;
-## $MOTIFS{$group} = $tmp;
-## }
-## }# end foreach $group
-## }
-## elsif(length $motif == 3){
-## print $tri_fh join("\t", $contig, $motif, $ssrStart, $ssrEnd, $forward, $reverse, $left_tm, $right_tm, $product_size, $seq, $seq_masked);
-## print $tri_fh "\n";
-## my $tmp = $MOTIFLEN_w_PRIMERS{3};
-## $tmp++;
-## $MOTIFLEN_w_PRIMERS{3} = $tmp;
-##
-## my $cnt = ($ssrEnd-$ssrStart+1)/3;
-## $tri_worksheet->write("A$tri_index", $contig, $formats->{text});
-## $tri_worksheet->write("B$tri_index", $motif, $formats->{text});
-## $tri_worksheet->write("C$tri_index", $cnt, $formats->{text});
-## $tri_worksheet->write("D$tri_index", $ssrStart, $formats->{text});
-## $tri_worksheet->write("E$tri_index", $ssrEnd, $formats->{text});
-## $tri_worksheet->write("F$tri_index", $forward, $formats->{text});
-## $tri_worksheet->write("G$tri_index", $reverse, $formats->{text});
-## $tri_worksheet->write("H$tri_index", $left_tm, $formats->{text});
-## $tri_worksheet->write("I$tri_index", $right_tm, $formats->{text});
-## $tri_worksheet->write("J$tri_index", $product_size, $formats->{text});
-## #$tri_worksheet->write("J$tri_index", $seq, $formats->{text});
-## $tri_index++;
-## }
-## elsif(length $motif == 4){
-## _printLineToWorksheet();
-## }
-## }
-## else{
-## print $fastamulti_fh ">$contig\n$seq\n";
-## }
-## }
-## }
-## }
-## } # end while <INPUT>
-##
-## close P3O;
-##
-## return;
-##}
-##
-##################################################################
-##sub _printLineToWorksheet{
-## my $fh = shift;
-## my $index = shift;
-## my $worksheet = shift;
-##
-## my $contig = shift;
-## my $motif = shift;
-## my $ssrStart = shift;
-## my $ssrEnd = shift;
-## my $forward = shift;
-## my $reverse = shift;
-## my $left_tm = shift;
-## my $right_tm = shift;
-## my $product_size = shift;
-## my $seq = shift;
-## my $seq_masked = shift;
-##
-## print $fh join("\t", $contig, $motif, $ssrStart, $ssrEnd, $forward, $reverse, $left_tm, $right_tm, $product_size, $seq, $seq_masked);
-## print $fh "\n";
-## my $tmp = $MOTIFLEN_w_PRIMERS{4};
-## $tmp++;
-## $MOTIFLEN_w_PRIMERS{4} = $tmp;
-##
-## my $cnt = ($ssrEnd-$ssrStart+1)/4;
-## $worksheet->write("A$index", $contig, $formats->{text});
-## $worksheet->write("B$index", $motif, $formats->{text});
-## $worksheet->write("C$index", $cnt, $formats->{text});
-## $worksheet->write("D$index", $ssrStart, $formats->{text});
-## $worksheet->write("E$index", $ssrEnd, $formats->{text});
-## $worksheet->write("F$index", $forward, $formats->{text});
-## $worksheet->write("G$index", $reverse, $formats->{text});
-## $worksheet->write("H$index", $left_tm, $formats->{text});
-## $worksheet->write("I$index", $right_tm, $formats->{text});
-## $worksheet->write("J$index", $product_size, $formats->{text});
-## $index++;
-##
-##}
-##
-#
-#################################################################
################################################################
-#sub _printUsage {
-# print "Usage: $0.pl <arguments>";
-# print qq(
-# The list of arguments includes:
-#
-# -f|--fasta_file <fasta_file>
-# Required. The file of the sequences to be searched.
-#
-# -m|--masked_file <masked_fasta_file>
-# Required. A soft-masked version of the fasta file (soft masked means low
-# complexity sequences are in lower case bases.)
-#
-# -p|--project "project name"
-# Optional. A project name for use in the Excel output.
-#
-# );
-# print "\n";
-# return;
-#}
-#
-#
-#1;
+sub _print_excel_file_line{
+ my $worksheet = shift;
+ my $index = shift;
+ my $ssr_id = shift;
+
+ $worksheet->write("A$index", $ssr_id);
+ $worksheet->write("B$index", $SSR_STATS{$ssr_id}{MOTIF});
+ $worksheet->write("C$index", $SSR_STATS{$ssr_id}{NO_REPEATS});
+ $worksheet->write("D$index", $SSR_STATS{$ssr_id}{START});
+ $worksheet->write("E$index", $SSR_STATS{$ssr_id}{END});
+ $worksheet->write("F$index", $SSR_STATS{$ssr_id}{FORWARD});
+ $worksheet->write("G$index", $SSR_STATS{$ssr_id}{REVERSE});
+ $worksheet->write("H$index", $SSR_STATS{$ssr_id}{LEFT_TM});
+ $worksheet->write("I$index", $SSR_STATS{$ssr_id}{RIGHT_TM});
+ $worksheet->write("J$index", $SSR_STATS{$ssr_id}{PRODUCT_SIZE});
+
+}
+
+###############################################################
+sub _printUsage {
+ print "Usage: $0.pl <arguments>";
+ print qq(
+ The list of arguments includes:
+
+ -f|--fasta_file <fasta_file>
+ Required. The file of the sequences to be searched.
+
+ -m|--masked_file <masked_fasta_file>
+ Required. A soft-masked version of the fasta file (soft masked means low
+ complexity sequences are in lower case bases.)
+
+ -p|--project "project name"
+ Optional. A project name for use in the Excel output.
+
+ );
+ print "\n";
+ return;
+}
+
+
+1;

0 comments on commit fa6abaf

Please sign in to comment.