Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Carlas CGN #93

Merged
merged 18 commits into from
Mar 12, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Andrew J. Page (ap13@sanger.ac.uk)
Carla A. Cummins (cc21@sanger.ac.uk)
2 changes: 1 addition & 1 deletion bin/create_pan_genome
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Create a pan genome from a set of proteome FASTA files

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
# BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::CreatePanGenome;

Bio::PanGenome::CommandLine::CreatePanGenome->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/extract_proteome_from_gff
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in GFF files and output the proteome

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ExtractProteomeFromGff;

Bio::PanGenome::CommandLine::ExtractProteomeFromGff->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/iterative_cdhit
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Iteratively run cdhit

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::IterativeCdhit;

Bio::PanGenome::CommandLine::IterativeCdhit->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/merge_multifasta_alignments
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a list of alignment files with equal numbers of sequences and merge them

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments;

Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_core_alignment
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ package Bio::PanGenome::Main::PanGenomeCoreAlignment;

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomeCoreAlignment;

Bio::PanGenome::CommandLine::PanGenomeCoreAlignment->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_post_analysis
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Perform the post analysis on the pan genome

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomePostAnalysis;

Bio::PanGenome::CommandLine::PanGenomePostAnalysis->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_reorder_spreadsheet
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a tree and a spreadsheet and output a reordered spreadsheet

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomeReorderSpreadsheet;

Bio::PanGenome::CommandLine::PanGenomeReorderSpreadsheet->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/parallel_all_against_all_blastp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a FASTA file of proteins and blast against itself

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ParallelAllAgainstAllBlastp;

Bio::PanGenome::CommandLine::ParallelAllAgainstAllBlastp->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/protein_muscle_alignment_from_nucleotides
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a multifasta file of nucleotides, convert to proteins and align with mus

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ProteinMuscleAlignmentFromNucleotides;

Bio::PanGenome::CommandLine::ProteinMuscleAlignmentFromNucleotides->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/query_pan_genome
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a groups file and the protein fasta files and output selected data

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::QueryPanGenome;

Bio::PanGenome::CommandLine::QueryPanGenome->new(args => \@ARGV, script_name => $0)->run;
19 changes: 19 additions & 0 deletions bin/roary
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env perl

package Bio::PanGenome::Main::Roary;

# ABSTRACT: Create a pan genome from a set of proteome FASTA files
# PODNAME: create_pan_geneome

=head1 SYNOPSIS

Create a pan genome from a set of proteome FASTA files

=cut

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::Roary;

Bio::PanGenome::CommandLine::Roary->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/transfer_annotation_to_groups
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a groups file and a set of GFF files and transfer the consensus annotati

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::TransferAnnotationToGroups;

Bio::PanGenome::CommandLine::TransferAnnotationToGroups->new(args => \@ARGV, script_name => $0)->run;
6 changes: 5 additions & 1 deletion lib/Bio/PanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@ has 'output_filename' => ( is => 'rw', isa => 'Str', default =
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'ro', isa => 'Str', default => 'mcl' );
has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 );
has 'dont_delete_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'dont_split_groups' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 1.0 );

has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );

Expand Down Expand Up @@ -119,9 +121,11 @@ sub run {
output_multifasta_files => $self->output_multifasta_files,
dont_delete_files => $self->dont_delete_files,
dont_create_rplots => $self->dont_create_rplots,
dont_split_groups => $self->dont_split_groups,
verbose_stats => $self->verbose_stats,
translation_table => $self->translation_table,
group_limit => $self->group_limit,
core_definition => $self->core_definition,
);
$post_analysis->run();

Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/AnalyseGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ sub BUILD {
my ($self) = @_;
# This triggers _genes_to_groups to be built
$self->_groups_to_genes;
# This triggers _genes_to_file to be buit
# This triggers _genes_to_file to be built
$self->_files_to_genes;
$self->_freq_groups_per_genome;
}
Expand Down
6 changes: 4 additions & 2 deletions lib/Bio/PanGenome/AnnotateGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Take in a group file and assosiated GFF files for the isolates and update the gr
use Moose;
use Bio::PanGenome::Exceptions;
use Bio::PanGenome::GeneNamesFromGFF;
use Data::Dumper;
use Array::Utils qw(array_minus);

use File::Grep qw(fgrep);

Expand Down Expand Up @@ -174,6 +176,7 @@ sub _builder__groups_to_id_names {
$groups_to_id_names{$group_name} = \@elements;
}
}

return \%groups_to_id_names;
}

Expand Down Expand Up @@ -259,8 +262,7 @@ sub _split_groups {
sub _remove_ids_from_group {
my ( $self, $ids_to_remove, $group ) = @_;

my @remaining_ids =
grep { not $_ ~~ @{$ids_to_remove} } @{ $self->_groups_to_id_names->{$group} };
my @remaining_ids = array_minus( @{ $self->_groups_to_id_names->{$group} }, @{ $ids_to_remove } );
$self->_groups_to_id_names->{$group} = \@remaining_ids;
if ( @{ $self->_groups_to_id_names->{$group} } == 0 ) {
delete( $self->_groups_to_id_names->{$group} );
Expand Down
Loading