Skip to content

Commit

Permalink
Merge pull request #93 from andrewjpage/merge_carla
Browse files Browse the repository at this point in the history
Merge Carlas CGN
  • Loading branch information
andrewjpage committed Mar 12, 2015
2 parents cbac53b + 4226fb4 commit bc683d3
Show file tree
Hide file tree
Showing 57 changed files with 1,280 additions and 282 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Andrew J. Page (ap13@sanger.ac.uk)
Carla A. Cummins (cc21@sanger.ac.uk)
2 changes: 1 addition & 1 deletion bin/create_pan_genome
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Create a pan genome from a set of proteome FASTA files

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
# BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::CreatePanGenome;

Bio::PanGenome::CommandLine::CreatePanGenome->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/extract_proteome_from_gff
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in GFF files and output the proteome

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ExtractProteomeFromGff;

Bio::PanGenome::CommandLine::ExtractProteomeFromGff->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/iterative_cdhit
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Iteratively run cdhit

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::IterativeCdhit;

Bio::PanGenome::CommandLine::IterativeCdhit->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/merge_multifasta_alignments
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a list of alignment files with equal numbers of sequences and merge them

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments;

Bio::PanGenome::CommandLine::MergeMultipleFastaAlignments->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_core_alignment
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ package Bio::PanGenome::Main::PanGenomeCoreAlignment;

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomeCoreAlignment;

Bio::PanGenome::CommandLine::PanGenomeCoreAlignment->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_post_analysis
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Perform the post analysis on the pan genome

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomePostAnalysis;

Bio::PanGenome::CommandLine::PanGenomePostAnalysis->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/pan_genome_reorder_spreadsheet
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a tree and a spreadsheet and output a reordered spreadsheet

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::PanGenomeReorderSpreadsheet;

Bio::PanGenome::CommandLine::PanGenomeReorderSpreadsheet->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/parallel_all_against_all_blastp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a FASTA file of proteins and blast against itself

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ParallelAllAgainstAllBlastp;

Bio::PanGenome::CommandLine::ParallelAllAgainstAllBlastp->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/protein_muscle_alignment_from_nucleotides
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a multifasta file of nucleotides, convert to proteins and align with mus

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::ProteinMuscleAlignmentFromNucleotides;

Bio::PanGenome::CommandLine::ProteinMuscleAlignmentFromNucleotides->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/query_pan_genome
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a groups file and the protein fasta files and output selected data

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::QueryPanGenome;

Bio::PanGenome::CommandLine::QueryPanGenome->new(args => \@ARGV, script_name => $0)->run;
19 changes: 19 additions & 0 deletions bin/roary
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env perl

package Bio::PanGenome::Main::Roary;

# ABSTRACT: Create a pan genome from a set of proteome FASTA files
# PODNAME: create_pan_geneome

=head1 SYNOPSIS
Create a pan genome from a set of proteome FASTA files
=cut

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::Roary;

Bio::PanGenome::CommandLine::Roary->new(args => \@ARGV, script_name => $0)->run;
2 changes: 1 addition & 1 deletion bin/transfer_annotation_to_groups
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Take in a groups file and a set of GFF files and transfer the consensus annotati

BEGIN { unshift( @INC, '../lib' ) }
BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
#BEGIN { unshift( @INC, '/software/pathogen/internal/prod/lib/' ) }
use Bio::PanGenome::CommandLine::TransferAnnotationToGroups;

Bio::PanGenome::CommandLine::TransferAnnotationToGroups->new(args => \@ARGV, script_name => $0)->run;
6 changes: 5 additions & 1 deletion lib/Bio/PanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@ has 'output_filename' => ( is => 'rw', isa => 'Str', default =
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'ro', isa => 'Str', default => 'mcl' );
has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 );
has 'dont_delete_files' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'dont_split_groups' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 1.0 );

has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );

Expand Down Expand Up @@ -119,9 +121,11 @@ sub run {
output_multifasta_files => $self->output_multifasta_files,
dont_delete_files => $self->dont_delete_files,
dont_create_rplots => $self->dont_create_rplots,
dont_split_groups => $self->dont_split_groups,
verbose_stats => $self->verbose_stats,
translation_table => $self->translation_table,
group_limit => $self->group_limit,
core_definition => $self->core_definition,
);
$post_analysis->run();

Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/AnalyseGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ sub BUILD {
my ($self) = @_;
# This triggers _genes_to_groups to be built
$self->_groups_to_genes;
# This triggers _genes_to_file to be buit
# This triggers _genes_to_file to be built
$self->_files_to_genes;
$self->_freq_groups_per_genome;
}
Expand Down
6 changes: 4 additions & 2 deletions lib/Bio/PanGenome/AnnotateGroups.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ Take in a group file and assosiated GFF files for the isolates and update the gr
use Moose;
use Bio::PanGenome::Exceptions;
use Bio::PanGenome::GeneNamesFromGFF;
use Data::Dumper;
use Array::Utils qw(array_minus);

use File::Grep qw(fgrep);

Expand Down Expand Up @@ -174,6 +176,7 @@ sub _builder__groups_to_id_names {
$groups_to_id_names{$group_name} = \@elements;
}
}

return \%groups_to_id_names;
}

Expand Down Expand Up @@ -259,8 +262,7 @@ sub _split_groups {
sub _remove_ids_from_group {
my ( $self, $ids_to_remove, $group ) = @_;

my @remaining_ids =
grep { not $_ ~~ @{$ids_to_remove} } @{ $self->_groups_to_id_names->{$group} };
my @remaining_ids = array_minus( @{ $self->_groups_to_id_names->{$group} }, @{ $ids_to_remove } );
$self->_groups_to_id_names->{$group} = \@remaining_ids;
if ( @{ $self->_groups_to_id_names->{$group} } == 0 ) {
delete( $self->_groups_to_id_names->{$group} );
Expand Down
Loading

0 comments on commit bc683d3

Please sign in to comment.