diff --git a/lib/Bio/PanGenome/Output/GroupsMultifastasNucleotide.pm b/lib/Bio/PanGenome/Output/GroupsMultifastasNucleotide.pm index ad39f45..beec362 100644 --- a/lib/Bio/PanGenome/Output/GroupsMultifastasNucleotide.pm +++ b/lib/Bio/PanGenome/Output/GroupsMultifastasNucleotide.pm @@ -28,6 +28,9 @@ has 'annotate_groups' => ( is => 'ro', isa => 'Bio::PanGenome::AnnotateGroups', has 'output_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_directory'); +has '_number_of_groups' => ( is => 'rw', isa => 'Num', lazy_build => 1 ); +has '_group_limit' => ( is => 'rw', isa => 'Num', default => 8000 ); + sub _build_output_directory { my ($self) = @_; @@ -35,9 +38,22 @@ sub _build_output_directory return $output_directory; } +sub _build__number_of_groups { + my $self = shift; + + return $self->annotate_groups->_group_counter; +} + sub create_files { my ($self) = @_; - + + my $num_groups = $self->_number_of_groups; + my $limit = $self->_group_limit; + if ( $num_groups > $limit ){ + print STDERR "Number of clusters ($num_groups) exceeds limit ($limit). Multifastas not created.\n"; + return 1; + } + make_path($self->output_directory); for my $gff_file ( @{ $self->gff_files } ) diff --git a/lib/Bio/PanGenome/PostAnalysis.pm b/lib/Bio/PanGenome/PostAnalysis.pm index 13ad5e9..07314af 100644 --- a/lib/Bio/PanGenome/PostAnalysis.pm +++ b/lib/Bio/PanGenome/PostAnalysis.pm @@ -181,19 +181,19 @@ sub _delete_intermediate_files my ($self) = @_; return if($self->dont_delete_files == 1); - unlink($self->_output_mcl_filename) ; - unlink($self->_output_inflate_clusters_filename) ; - unlink($self->_output_group_labels_filename) ; - unlink($self->_output_combined_filename) ; + unlink($self->_output_mcl_filename) ; + unlink($self->_output_inflate_clusters_filename) ; + unlink($self->_output_group_labels_filename) ; + unlink($self->_output_combined_filename) ; unlink($self->clusters_filename) ; unlink($self->clusters_filename . '.clstr' ) ; unlink($self->clusters_filename . '.bak.clstr' ) ; - unlink('_gff_files') ; - unlink('_fasta_files') ; - unlink('_clustered_filtered.fa') ; - unlink($self->_input_cd_hit_groups_file) ; - unlink('database_masking.asnb') ; - unlink('_clustered') ; + unlink('_gff_files') ; + unlink('_fasta_files') ; + unlink('_clustered_filtered.fa') ; + unlink($self->_input_cd_hit_groups_file) ; + unlink('database_masking.asnb') ; + unlink('_clustered') ; } no Moose; diff --git a/t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t b/t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t index b59fa82..7584036 100644 --- a/t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t +++ b/t/Bio/PanGenome/Output/GroupsMultifastasNucleotide.t @@ -9,6 +9,7 @@ BEGIN { unshift( @INC, './lib' ) } BEGIN { use Test::Most; + use Test::Output; use_ok('Bio::PanGenome::Output::GroupsMultifastasNucleotide'); use Bio::PanGenome::AnnotateGroups; use Bio::PanGenome::AnalyseGroups; @@ -18,6 +19,7 @@ BEGIN { remove_tree('pan_genome_sequences'); my $gff_files = [ 't/data/query_1.gff', 't/data/query_2.gff','t/data/query_3.gff' ]; +my $obj; my $annotate_groups = Bio::PanGenome::AnnotateGroups->new( gff_files => $gff_files, @@ -26,10 +28,14 @@ my $annotate_groups = Bio::PanGenome::AnnotateGroups->new( $annotate_groups->reannotate; +#print Dumper $annotate_groups->_genes_to_file; +#print Dumper $annotate_groups; +#print $annotate_groups->_group_counter; + ok( - my $obj = Bio::PanGenome::Output::GroupsMultifastasNucleotide->new( - group_names => [ 'group_2', 'group_5' ], - gff_files => $gff_files, + $obj = Bio::PanGenome::Output::GroupsMultifastasNucleotide->new( + group_names => [ 'group_2', 'group_5' ], + gff_files => $gff_files, annotate_groups => $annotate_groups ), 'initialise creating multiple fastas' @@ -44,4 +50,17 @@ is(read_file('pan_genome_sequences/group_6.fa'), read_file('t/data/pan_genome_se is(read_file('pan_genome_sequences/yfnB.fa'), read_file('t/data/pan_genome_sequences/yfnB.fa' ), 'Check multifasta content is correct for 1-yfnB.fa '); remove_tree('pan_genome_sequences'); +# test group number limit +ok( + $obj = Bio::PanGenome::Output::GroupsMultifastasNucleotide->new( + group_names => [ 'group_2', 'group_5' ], + gff_files => $gff_files, + annotate_groups => $annotate_groups, + _group_limit => 4 + ), + 'initialise creating multiple fastas' +); +my $exp_stderr = "Number of clusters (8) exceeds limit (4). Multifastas not created.\n"; +stderr_is { $obj->create_files() } $exp_stderr, 'multifasta creation fails when group limit exceeded'; + done_testing();