From 1f62583885c07adbf9dec924d3afe782fda8a74a Mon Sep 17 00:00:00 2001 From: Meg Staton Date: Mon, 10 Mar 2014 15:31:46 -0400 Subject: [PATCH] The usage instructions now make it clear that masked file is required. Paths to primer3 changed to new UTK location. --- hwg_gssr_scripts/findSSRs_post_assembly.pl | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/hwg_gssr_scripts/findSSRs_post_assembly.pl b/hwg_gssr_scripts/findSSRs_post_assembly.pl index d70fe5b..2ebc59f 100755 --- a/hwg_gssr_scripts/findSSRs_post_assembly.pl +++ b/hwg_gssr_scripts/findSSRs_post_assembly.pl @@ -29,6 +29,10 @@ # -f|--fasta_file # Required. The file of the sequences to be searched. # +# -m|--masked_file +# Required. A soft-masked version of the fasta file (soft masked means low +# complexity sequences are in lower case bases.) +# # Output: # ------ # Eight output files are produced: @@ -118,8 +122,8 @@ #------------ # PRIMER PARAMETERS -my $PRIMER3 = "/primer3-2.3.5/src/primer3_core"; -my $PRIMER3_CONFIG = "/primer3-2.3.5/src/primer3_config/"; +my $PRIMER3 = "/lustre/projects/staton/software/primer3-2.3.6/src/primer3_core"; +my $PRIMER3_CONFIG = "/lustre/projects/staton/software/primer3-2.3.6/src/primer3_config/"; my $PRIMER_OPT_SIZE="20"; # default 20 my $PRIMER_MIN_SIZE="18"; # default 18 @@ -207,7 +211,7 @@ sub main{ my $fasta_file; my $masked_file; - my $species; + my $project; my $p3_input; my $p3_output; @@ -225,7 +229,7 @@ sub main{ Getopt::Long::Configure ('bundling'); GetOptions('f|fasta_file=s' => \$fasta_file, 'm|masked_file=s' => \$masked_file, - 's|species=s' => \$species); + 'p|project=s' => \$project); ## Check that all required parameters have been included if(!$fasta_file){ print "A fasta file is required.\n"; _printUsage(); exit;} @@ -269,12 +273,12 @@ sub main{ my $tri_fh = *TRI; my $tetra_fh = *TETRA; print "parsing primer3..."; - parseP3_output($p3_output, $di_fh, $tri_fh, $tetra_fh, $workbook, $formats, $species); + parseP3_output($p3_output, $di_fh, $tri_fh, $tetra_fh, $workbook, $formats, $project); print "done.\n"; close RPT; print "stats...\n"; - my $worksheet_stats = printStats($stats_out, $workbook, $formats, $species); + my $worksheet_stats = printStats($stats_out, $workbook, $formats, $project); $worksheet_stats->activate(); $worksheet_stats->select(); @@ -504,7 +508,7 @@ sub parseP3_output{ my $workbook = $_[4]; # file name my $formats = $_[5]; # file name - my $species = $_[6]; # file name + my $project = $_[6]; # file name my $start; my $seq_id; @@ -519,7 +523,7 @@ sub parseP3_output{ $di_worksheet->set_column('A:A', 60, $formats->{text}); $di_worksheet->set_column('F:G', 30, $formats->{text}); #$di_worksheet->set_column('J:J', 100, $formats->{text}); - $di_worksheet->write('A1', "Dinucleotide Repeats for $species", $formats->{header}); + $di_worksheet->write('A1', "Dinucleotide Repeats for $project", $formats->{header}); $di_worksheet->write('A2', 'Sequence Name', $formats->{header}); $di_worksheet->write('B2', 'Motif', $formats->{header}); $di_worksheet->write('C2', '# Repeats', $formats->{header}); @@ -537,7 +541,7 @@ sub parseP3_output{ $tri_worksheet->set_column('A:A', 60, $formats->{text}); $tri_worksheet->set_column('F:G', 30, $formats->{text}); #$tri_worksheet->set_column('J:J', 100, $formats->{text}); - $tri_worksheet->write('A1', "Trinucleotide Repeats for $species", $formats->{header}); + $tri_worksheet->write('A1', "Trinucleotide Repeats for $project", $formats->{header}); $tri_worksheet->write('A2', 'Sequence Name', $formats->{header}); $tri_worksheet->write('B2', 'Motif', $formats->{header}); $tri_worksheet->write('C2', '# Repeats', $formats->{header}); @@ -555,7 +559,7 @@ sub parseP3_output{ $tetra_worksheet->set_column('A:A', 60, $formats->{text}); $tetra_worksheet->set_column('F:G', 30, $formats->{text}); #$tetra_worksheet->set_column('J:J', 100, $formats->{text}); - $tetra_worksheet->write('A1', "Tetranucleotide Repeats for $species", $formats->{header}); + $tetra_worksheet->write('A1', "Tetranucleotide Repeats for $project", $formats->{header}); $tetra_worksheet->write('A2', 'Sequence Name', $formats->{header}); $tetra_worksheet->write('B2', 'Motif', $formats->{header}); $tetra_worksheet->write('C2', '# Repeats', $formats->{header}); @@ -741,7 +745,7 @@ sub printStats{ my $stats_out = $_[0]; # file name my $workbook = $_[1]; # file name my $formats = $_[2]; # file name - my $species = $_[3]; # file name + my $project = $_[3]; # file name ##-------------------------------------------------------------------- @@ -828,7 +832,7 @@ sub printStats{ $worksheet->set_column('A:A', 75, $formats->{text}); $worksheet->set_column('B:B', 30, $formats->{text}); - $worksheet->write('A1',"SSR Summary Report for $species", $formats->{header}); + $worksheet->write('A1',"SSR Summary Report for $project", $formats->{header}); $worksheet->write('A2',"Analsis of $SEQ_COUNT sequences", $formats->{text}); $worksheet->write('A3',"$time", $formats->{text}); @@ -958,6 +962,13 @@ sub _printUsage { -f|--fasta_file Required. The file of the sequences to be searched. + -m|--masked_file + Required. A soft-masked version of the fasta file (soft masked means low + complexity sequences are in lower case bases.) + + -p|--project "project name" + Optional. A project name for use in the Excel output. + ); print "\n"; return;