Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename spreadsheet #92

Merged
merged 1 commit into from
Mar 6, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required =
has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'group_statisics.csv' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
Expand Down
4 changes: 2 additions & 2 deletions lib/Bio/PanGenome/CommandLine/PanGenomeCoreAlignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );

has 'multifasta_base_directory' => ( is => 'rw', isa => 'Str', default => 'pan_genome_sequences' );
has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'group_statisics.csv' );
has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
has '_error_message' => ( is => 'rw', isa => 'Str' );

Expand Down Expand Up @@ -94,7 +94,7 @@ sub usage_text {
pan_genome_core_alignment

# Specify the directory containing the multifastas (-m), the spreadsheet (-s) and an output file name (-o)
pan_genome_core_alignment -m pan_genome_sequences -s group_statisics.csv -o output_alignment.aln
pan_genome_core_alignment -m pan_genome_sequences -s gene_presence_absence.csv -o output_alignment.aln

# This help message
pan_genome_core_alignment -h
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/CommandLine/PanGenomePostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ has 'fasta_files' => ( is => 'rw', isa => 'Str' );
has 'input_files' => ( is => 'rw', isa => 'Str');
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'group_statisics.csv' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'clusters_filename' => ( is => 'rw', isa => 'Str' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/PostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required =
has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'group_statisics.csv' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );

has 'clusters_filename' => ( is => 'rw', isa => 'Str', required => 1 );
Expand Down
10 changes: 5 additions & 5 deletions t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ system('touch empty_file');
' -j Local t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'clustered_proteins', 't/data/clustered_proteins_pan_genome' ],
' -j Local t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'group_statisics.csv', 't/data/overall_group_statisics.csv' ],
[ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
' -t 1 -j Local t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'group_statisics.csv', 't/data/overall_group_statisics.csv' ],
[ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
' -j Parallel t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'clustered_proteins', 't/data/clustered_proteins_pan_genome' ],
' -j Parallel t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'group_statisics.csv', 't/data/overall_group_statisics.csv' ],
[ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
' -t 1 -j Parallel t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
[ 'group_statisics.csv', 't/data/overall_group_statisics.csv' ],
[ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
);
Expand Down Expand Up @@ -71,7 +71,7 @@ sub cleanup_files
unlink('example_1.faa.tmp.filtered.fa');
unlink('example_2.faa.tmp.filtered.fa');
unlink('example_3.faa.tmp.filtered.fa');
unlink('group_statisics.csv');
unlink('gene_presence_absence.csv');
unlink('query_1.gff.proteome.faa');
unlink('query_2.gff.proteome.faa');
unlink('query_3.gff.proteome.faa');
Expand Down
10 changes: 5 additions & 5 deletions t/Bio/PanGenome/CommandLine/PanGenomePostAnalysis.t
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ local $ENV{PATH} = "$ENV{PATH}:./bin";
system('cp t/data/post_analysis/* .');
system('touch empty_file');
my %scripts_and_expected_files = (
'-o clustered_proteins -p pan_genome.fa -s group_statisics.csv -c _clustered.clstr -i _gff_files -f _fasta_files -j Local --dont_create_rplots' =>
'-o clustered_proteins -p pan_genome.fa -s gene_presence_absence.csv -c _clustered.clstr -i _gff_files -f _fasta_files -j Local --dont_create_rplots' =>
[ 'clustered_proteins', 't/data/clustered_proteins_post_analysis' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
Expand All @@ -33,7 +33,7 @@ ok( -e 'number_of_unique_genes.Rtab', 'number_of_unique_genes.Rtab exists');
ok( -e 'number_of_new_genes.Rtab', 'number_of_new_genes exists');
ok( -e 'number_of_genes_in_pan_genome.Rtab', 'number_of_genes_in_pan_genome exists');
ok( -e 'number_of_conserved_genes.Rtab','number_of_conserved_genes');
ok( -e 'group_statisics.csv', 'group_statisics exists');
ok( -e 'gene_presence_absence.csv', 'gene_presence_absence exists');
ok( -e 'core_accessory.tab', 'core_accessory.tab exists');
ok( -e 'core_accessory.header.embl','core_accessory.header.embl exists');
ok( -e 'accessory.tab','accessory.tab exists');
Expand All @@ -49,7 +49,7 @@ cleanup_files();
system('cp t/data/post_analysis/* .');
system('touch empty_file');
%scripts_and_expected_files = (
'-t 1 -o clustered_proteins -p pan_genome.fa -s group_statisics.csv -c _clustered.clstr -i _gff_files -f _fasta_files -j Local --dont_create_rplots' =>
'-t 1 -o clustered_proteins -p pan_genome.fa -s gene_presence_absence.csv -c _clustered.clstr -i _gff_files -f _fasta_files -j Local --dont_create_rplots' =>
[ 'clustered_proteins', 't/data/clustered_proteins_post_analysis' ],
'-h' =>
[ 'empty_file', 't/data/empty_file' ],
Expand All @@ -61,7 +61,7 @@ ok( -e 'number_of_unique_genes.Rtab', 'number_of_unique_genes.Rtab exists');
ok( -e 'number_of_new_genes.Rtab', 'number_of_new_genes exists');
ok( -e 'number_of_genes_in_pan_genome.Rtab', 'number_of_genes_in_pan_genome exists');
ok( -e 'number_of_conserved_genes.Rtab','number_of_conserved_genes');
ok( -e 'group_statisics.csv', 'group_statisics exists');
ok( -e 'gene_presence_absence.csv', 'gene_presence_absence exists');
ok( -e 'core_accessory.tab', 'core_accessory.tab exists');
ok( -e 'core_accessory.header.embl','core_accessory.header.embl exists');
ok( -e 'accessory.tab','accessory.tab exists');
Expand Down Expand Up @@ -93,7 +93,7 @@ sub cleanup_files
unlink('accessory.tab');
unlink('core_accessory.header.embl');
unlink('core_accessory.tab');
unlink('group_statisics.csv');
unlink('gene_presence_absence.csv');
unlink('number_of_unique_genes.Rtab');
unlink('number_of_new_genes.Rtab');
unlink('number_of_genes_in_pan_genome.Rtab');
Expand Down
2 changes: 1 addition & 1 deletion t/Bio/PanGenome/CommandLine/QueryPanGenome.t
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files
unlink('set_difference_unique_set_two') if ( -e 'set_difference_unique_set_two' );
unlink('set_difference_common_set') if ( -e 'set_difference_common_set' );
unlink('pan_genome_results_group_5.fa') if ( -e 'pan_genome_results_group_5.fa' );
unlink('group_statisics.csv') if ( -e 'group_statisics.csv' );
unlink('gene_presence_absence.csv') if ( -e 'gene_presence_absence.csv' );
unlink('set_difference_unique_set_two_statistics.csv') if ( -e 'set_difference_unique_set_two_statistics.csv' );
unlink('set_difference_unique_set_one_statistics.csv') if ( -e 'set_difference_unique_set_one_statistics.csv' );
unlink('set_difference_common_set_statistics.csv') if ( -e 'set_difference_common_set_statistics.csv' );
Expand Down
22 changes: 22 additions & 0 deletions t/data/overall_gene_presence_absence.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragement","Accessory Order with Fragment","QC","query_1","query_2","query_5"
"hly","","Alpha-toxin","2","2","1","1","13",,,"","1_1","2_1",""
"group_14","","Gonococcal growth inhibitor III","2","2","1","1","2",,,"","1_6","2_7",""
"speH","","hypothetical protein","2","2","1","1","10",,,"","1_2","2_2",""
"argF","","Ornithine carbamoyltransferase","2","2","1","1","8",,,"","1_3","2_3",""
"group_10","","hypothetical protein","1","2","2","1","6",,,"","","abc_00010 abc_00010",""
"group_11","","C4-dicarboxylate transporter/malic acid transport protein","1","2","2","1","5",,,"","","abc_00011 abc_00011",""
"group_12","","hypothetical protein","1","2","2","1","4",,,"","","abc_00012 abc_00012",""
"group_13","","Gonococcal growth inhibitor III","1","2","2","1","3",,,"","","abc_00014 abc_00014",""
"yfnB","","Putative HAD-hydrolase yfnB","1","2","2","1","1",,,"","","abc_00016 abc_00016",""
"group_16","","hypothetical protein","1","1","1","2","3","1","6","","","","abc_50002"
"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","4","1","5","","","","3_3"
"group_18","","hypothetical protein","1","1","1","2","5","1","4","","","","abc_50010"
"group_19","","hypothetical protein","1","1","1","2","6","1","3","","","","abc_50012"
"group_2","","hypothetical protein","1","2","2","1","14",,,"","","abc_00002 abc_00002",""
"group_20","","Gonococcal growth inhibitor III","1","1","1","2","2","1","2","","","","abc_50014"
"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","1","1","1","","","","3_5"
"group_3","","hypothetical protein","1","2","2","1","12",,,"","","abc_00003 abc_00003",""
"group_4","","superantigen-like protein","1","2","2","1","11",,,"","","abc_00004 abc_00004",""
"group_6","","superantigen-like protein","1","2","2","1","9",,,"","","abc_00006 abc_00006",""
"arcC1","","Carbamate kinase 1","1","2","2","1","7",,,"","","abc_00008 abc_00008",""
"group_9","","","1","2","2","","","","","","","abc_01705 abc_01705",""
14 changes: 14 additions & 0 deletions t/data/post_analysis_expected/gene_presence_absence.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragement","Accessory Order with Fragment","QC","query_1","query_2","query_6"
"group_5","","Gonococcal growth inhibitor III","2","2","1","1","2",,,"","1_6","2_7",""
"hly","","Alpha-toxin","2","2","1","1","12",,,"","1_1","2_1",""
"speH","","hypothetical protein","2","2","1","1","8",,,"","1_2","2_2",""
"argF","","Ornithine carbamoyltransferase","2","2","1","1","6",,,"","1_3","2_3",""
"group_12","","hypothetical protein","1","2","2","1","11",,,"","","","abc_00002 abc_00002"
"group_13","","hypothetical protein","1","2","2","1","10",,,"","","abc_00003 abc_00003",""
"group_6","","","1","2","2","","","","","","","abc_01705 abc_01705",""
"group_8","","C4-dicarboxylate transporter/malic acid transport protein","1","2","2","1","3",,,"","","abc_00011 abc_00011",""
"group_2","","superantigen-like protein","1","2","2","1","9",,,"","","abc_00004 abc_00004",""
"group_3","","superantigen-like protein","1","2","2","1","7",,,"","","abc_00006 abc_00006",""
"yfnB","","Putative HAD-hydrolase yfnB","1","2","2","1","1",,,"","","abc_00016 abc_00016",""
"group_7","","hypothetical protein","1","2","2","1","4",,,"","","abc_00010 abc_00010",""
"arcC1","","Carbamate kinase 1","1","2","2","1","5",,,"","","abc_00008 abc_00008",""