diff --git a/hwg_gssr_scripts/findSSRs_post_assembly.pl b/hwg_gssr_scripts/findSSRs_post_assembly.pl index 66a47f7..0a79492 100755 --- a/hwg_gssr_scripts/findSSRs_post_assembly.pl +++ b/hwg_gssr_scripts/findSSRs_post_assembly.pl @@ -879,11 +879,13 @@ sub create_excel_file{ my $header_format = $workbook->add_format(%header); my $text_format = $workbook->add_format(%text); - $workbook = create_stats_worksheet($workbook, $header_format, $text_format, $project); + my $worksheet_stats = create_stats_worksheet($workbook, $header_format, $text_format, $project); - #my $di_worksheet = _initiate_worksheet($workbook, $formats, $project, "Dinucleotide"); - #my $tri_worksheet = _initiate_worksheet($workbook, $formats, $project, "Trinucleotide"); - #my $tetra_worksheet = _initiate_worksheet($workbook, $formats, $project, "Tetranucleotide"); + build_data_worksheets($workbook, $header_format, $text_format); + + $worksheet_stats->activate(); + $worksheet_stats->select(); + $workbook->close(); } @@ -988,191 +990,115 @@ sub create_stats_worksheet{ $worksheet->write("B$i", $MOTIFLEN_w_PRIMERS{$group}); } + return $worksheet; } +############################################################## +sub build_data_worksheets{ + my $workbook = shift; + my $header_format = shift; + my $text_format = shift; -#sub _initiate_worksheet{ -# my $workbook = $_[0]; -# my $formats = $_[1]; -# my $project = $_[2]; -# my $name = $_[3]; -# -# my $worksheet = $workbook->add_worksheet($name); -# $worksheet->set_column('A:A', 60, $formats->{text}); -# $worksheet->set_column('F:G', 30, $formats->{text}); -# #$worksheet->set_column('J:J', 100, $formats->{text}); -# $worksheet->write('A1', "$name Repeats for $project", $formats->{header}); -# $worksheet->write('A2', 'Sequence Name', $formats->{header}); -# $worksheet->write('B2', 'Motif', $formats->{header}); -# $worksheet->write('C2', '# Repeats', $formats->{header}); -# $worksheet->write('D2', 'Start', $formats->{header}); -# $worksheet->write('E2', 'End', $formats->{header}); -# $worksheet->write('F2', 'Forward Primer', $formats->{header}); -# $worksheet->write('G2', 'Reverse Primer', $formats->{header}); -# $worksheet->write('H2', 'Forward Tm', $formats->{header}); -# $worksheet->write('I2', 'Reverse Tm', $formats->{header}); -# $worksheet->write('J2', 'Fragment Size', $formats->{header}); -# #$worksheet->write('J2', 'Sequence', $formats->{header}); -# -# return $worksheet; -#} - - - #my $worksheet_stats = printStats($stats_out, $workbook, $formats, $project); - #$worksheet_stats->activate(); - #$worksheet_stats->select(); - #$workbook->close(); -#sub initiate_workbooks{ -# my $workbook = $_[0]; # file name -# my $formats = $_[1]; # file name -# my $project = $_[2]; # file name -# -# _print_worksheet($di_worksheet, $formats, $project); -# _print_worksheet($tri_worksheet, $formats, $project); -# _print_worksheet($tetra_worksheet, $formats, $project); -#} -################################################################ + my $di_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Dinucleotides"); + my $tri_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Trinucleotides"); + my $tetra_worksheet = _initiate_worksheet($workbook, $header_format, $text_format, "Tetranucleotides"); + + my $di_index = 3; + my $tri_index = 3; + my $tetra_index = 3; + + foreach my $ssr_id (keys %SSR_STATS){ + # for excel data files, only print SSRs + # that have primers + if($SSR_STATS{$ssr_id}{COMPOUND} == 0 && + $SSR_STATS{$ssr_id}{FORWARD} =~ /\S/ + ){ + if($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 2){ + _print_excel_file_line($di_worksheet, $di_index, $ssr_id); + $di_index++; + } + elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 3){ + _print_excel_file_line($tri_worksheet, $tri_index, $ssr_id); + $tri_index++; + } + elsif($SSR_STATS{$ssr_id}{MOTIF_LENGTH} == 4){ + _print_excel_file_line($tetra_worksheet, $tetra_index, $ssr_id); + $tetra_index++; + } + } + } + + +} + +############################################################## +sub _initiate_worksheet{ + my $workbook = $_[0]; + my $header_format = $_[1]; + my $text_format = $_[2]; + my $name = $_[3]; + + my $worksheet = $workbook->add_worksheet($name); + $worksheet->set_column('A:A', 60, $text_format); + $worksheet->set_column('B:E', 10, $text_format); + $worksheet->set_column('F:G', 30, $text_format); + $worksheet->set_column('H:J', 10, $text_format); + + $worksheet->write('A1', "$name with primers", $header_format); + $worksheet->write('A2', 'SSR ID', $header_format); + $worksheet->write('B2', 'Motif', $header_format); + $worksheet->write('C2', '# Repeats', $header_format); + $worksheet->write('D2', 'Start', $header_format); + $worksheet->write('E2', 'End', $header_format); + $worksheet->write('F2', 'Forward Primer', $header_format); + $worksheet->write('G2', 'Reverse Primer', $header_format); + $worksheet->write('H2', 'Forward Tm', $header_format); + $worksheet->write('I2', 'Reverse Tm', $header_format); + $worksheet->write('J2', 'Fragment Size', $header_format); + + return $worksheet; +} -#sub _print_worksheet{ -# my $worksheet = $_[0]; -# my $formats = $_[1]; -# my $name = $_[2]; -# -# -#} - #foreach my $group (keys %MOTIFS) { - # my $motifUC = uc($motif); - # if($group =~ /\|$motifUC\|/){ - # # If this group contains this motif - # #print "Incrementing $group for $motif\n"; - # $MOTIFS{$group}++; - # } - #} -## -## $di_worksheet->write("A$di_index", $contig, $formats->{text}); -## $di_worksheet->write("B$di_index", $motif, $formats->{text}); -## $di_worksheet->write("C$di_index", $cnt, $formats->{text}); -## $di_worksheet->write("D$di_index", $ssrStart, $formats->{text}); -## $di_worksheet->write("E$di_index", $ssrEnd, $formats->{text}); -## $di_worksheet->write("F$di_index", $forward, $formats->{text}); -## $di_worksheet->write("G$di_index", $reverse, $formats->{text}); -## $di_worksheet->write("H$di_index", $left_tm, $formats->{text}); -## $di_worksheet->write("I$di_index", $right_tm, $formats->{text}); -## $di_worksheet->write("J$di_index", $product_size, $formats->{text}); -## #$di_worksheet->write("J$di_index", $seq, $formats->{text}); -## $di_index++; -## -## # Increment motif count -## foreach my $group (keys %MOTIFS) { -## my $motifUC = uc($motif); -## if($group =~ /\|$motifUC\|/){ -## # If this group contains this motif -## my $tmp = $MOTIFS{$group}++; -## $tmp++; -## $MOTIFS{$group} = $tmp; -## } -## }# end foreach $group -## } -## elsif(length $motif == 3){ -## print $tri_fh join("\t", $contig, $motif, $ssrStart, $ssrEnd, $forward, $reverse, $left_tm, $right_tm, $product_size, $seq, $seq_masked); -## print $tri_fh "\n"; -## my $tmp = $MOTIFLEN_w_PRIMERS{3}; -## $tmp++; -## $MOTIFLEN_w_PRIMERS{3} = $tmp; -## -## my $cnt = ($ssrEnd-$ssrStart+1)/3; -## $tri_worksheet->write("A$tri_index", $contig, $formats->{text}); -## $tri_worksheet->write("B$tri_index", $motif, $formats->{text}); -## $tri_worksheet->write("C$tri_index", $cnt, $formats->{text}); -## $tri_worksheet->write("D$tri_index", $ssrStart, $formats->{text}); -## $tri_worksheet->write("E$tri_index", $ssrEnd, $formats->{text}); -## $tri_worksheet->write("F$tri_index", $forward, $formats->{text}); -## $tri_worksheet->write("G$tri_index", $reverse, $formats->{text}); -## $tri_worksheet->write("H$tri_index", $left_tm, $formats->{text}); -## $tri_worksheet->write("I$tri_index", $right_tm, $formats->{text}); -## $tri_worksheet->write("J$tri_index", $product_size, $formats->{text}); -## #$tri_worksheet->write("J$tri_index", $seq, $formats->{text}); -## $tri_index++; -## } -## elsif(length $motif == 4){ -## _printLineToWorksheet(); -## } -## } -## else{ -## print $fastamulti_fh ">$contig\n$seq\n"; -## } -## } -## } -## } -## } # end while -## -## close P3O; -## -## return; -##} -## -################################################################## -##sub _printLineToWorksheet{ -## my $fh = shift; -## my $index = shift; -## my $worksheet = shift; -## -## my $contig = shift; -## my $motif = shift; -## my $ssrStart = shift; -## my $ssrEnd = shift; -## my $forward = shift; -## my $reverse = shift; -## my $left_tm = shift; -## my $right_tm = shift; -## my $product_size = shift; -## my $seq = shift; -## my $seq_masked = shift; -## -## print $fh join("\t", $contig, $motif, $ssrStart, $ssrEnd, $forward, $reverse, $left_tm, $right_tm, $product_size, $seq, $seq_masked); -## print $fh "\n"; -## my $tmp = $MOTIFLEN_w_PRIMERS{4}; -## $tmp++; -## $MOTIFLEN_w_PRIMERS{4} = $tmp; -## -## my $cnt = ($ssrEnd-$ssrStart+1)/4; -## $worksheet->write("A$index", $contig, $formats->{text}); -## $worksheet->write("B$index", $motif, $formats->{text}); -## $worksheet->write("C$index", $cnt, $formats->{text}); -## $worksheet->write("D$index", $ssrStart, $formats->{text}); -## $worksheet->write("E$index", $ssrEnd, $formats->{text}); -## $worksheet->write("F$index", $forward, $formats->{text}); -## $worksheet->write("G$index", $reverse, $formats->{text}); -## $worksheet->write("H$index", $left_tm, $formats->{text}); -## $worksheet->write("I$index", $right_tm, $formats->{text}); -## $worksheet->write("J$index", $product_size, $formats->{text}); -## $index++; -## -##} -## -# -################################################################# ################################################################ -#sub _printUsage { -# print "Usage: $0.pl "; -# print qq( -# The list of arguments includes: -# -# -f|--fasta_file -# Required. The file of the sequences to be searched. -# -# -m|--masked_file -# Required. A soft-masked version of the fasta file (soft masked means low -# complexity sequences are in lower case bases.) -# -# -p|--project "project name" -# Optional. A project name for use in the Excel output. -# -# ); -# print "\n"; -# return; -#} -# -# -#1; +sub _print_excel_file_line{ + my $worksheet = shift; + my $index = shift; + my $ssr_id = shift; + + $worksheet->write("A$index", $ssr_id); + $worksheet->write("B$index", $SSR_STATS{$ssr_id}{MOTIF}); + $worksheet->write("C$index", $SSR_STATS{$ssr_id}{NO_REPEATS}); + $worksheet->write("D$index", $SSR_STATS{$ssr_id}{START}); + $worksheet->write("E$index", $SSR_STATS{$ssr_id}{END}); + $worksheet->write("F$index", $SSR_STATS{$ssr_id}{FORWARD}); + $worksheet->write("G$index", $SSR_STATS{$ssr_id}{REVERSE}); + $worksheet->write("H$index", $SSR_STATS{$ssr_id}{LEFT_TM}); + $worksheet->write("I$index", $SSR_STATS{$ssr_id}{RIGHT_TM}); + $worksheet->write("J$index", $SSR_STATS{$ssr_id}{PRODUCT_SIZE}); + +} + +############################################################### +sub _printUsage { + print "Usage: $0.pl "; + print qq( + The list of arguments includes: + + -f|--fasta_file + Required. The file of the sequences to be searched. + + -m|--masked_file + Required. A soft-masked version of the fasta file (soft masked means low + complexity sequences are in lower case bases.) + + -p|--project "project name" + Optional. A project name for use in the Excel output. + + ); + print "\n"; + return; +} + + +1;