Skip to content

Commit

Permalink
Merge pull request #10 from andrewjpage/master
Browse files Browse the repository at this point in the history
tests for create pan genome script
  • Loading branch information
andrewjpage committed Apr 30, 2013
2 parents 170b563 + 52a127d commit 80f7b78
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 8 deletions.
10 changes: 10 additions & 0 deletions lib/Bio/PanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ use Bio::PanGenome::InflateClusters;
use Bio::PanGenome::AnalyseGroups;
use Bio::PanGenome::GroupLabels;
use Bio::PanGenome::AnnotateGroups;
use Bio::PanGenome::Output::OneGenePerGroupFasta;

has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
Expand Down Expand Up @@ -92,13 +94,21 @@ sub run {
groups_filename => $output_group_labels_filename,
);
$annotate_groups->reannotate;

my $one_gene_per_fasta = Bio::PanGenome::Output::OneGenePerGroupFasta->new(
analyse_groups => $analyse_groups_obj,
output_filename => $self->output_pan_geneome_filename
);
$one_gene_per_fasta->create_file();

unlink($output_blast_results_filename);
unlink($output_combined_filename);
unlink($output_cd_hit_filename );
unlink($output_mcl_filename );
unlink($output_inflate_clusters_filename);
unlink($output_group_labels_filename);
unlink($output_cd_hit_filename.'.clstr');
unlink($output_cd_hit_filename.'.bak.clstr');
}

no Moose;
Expand Down
8 changes: 4 additions & 4 deletions lib/Bio/PanGenome/CommandLine/CreatePanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_pro
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'ro', isa => 'Str', default => 'mcl' );
has 'mcxdeblast_exec' => ( is => 'rw', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'rw', isa => 'Str', default => 'mcl' );

has '_error_message' => ( is => 'rw', isa => 'Str' );

Expand All @@ -39,8 +39,8 @@ sub BUILD {
'j|job_runner=s' => \$job_runner,
'm|makeblastdb_exec=s' => \$makeblastdb_exec,
'b|blastp_exec=s' => \$blastp_exec,
'd|mcxdeblast_exec' => \$mcxdeblast_exec,
'c|mcl_exec' => \$mcl_exec,
'd|mcxdeblast_exec=s' => \$mcxdeblast_exec,
'c|mcl_exec=s' => \$mcl_exec,
'h|help' => \$help,
);

Expand Down
5 changes: 2 additions & 3 deletions lib/Bio/PanGenome/ExtractProteomeFromGFF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ sub _build__working_directory_name {
}

sub _gff_parser {
my ( $self, $filename ) = @_;
# $self->_awk_filter .
open( my $fh, '-|', "cat " . $filename ) or die "Couldnt open GFF file";
my ( $self, $filename ) = @_;
open( my $fh, '-|', $self->_awk_filter . $filename ) or die "Couldnt open GFF file";
my $gff_parser = Bio::Tools::GFF->new( -fh => $fh, gff_version => 3 );
return $gff_parser;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/Output/GroupMultifasta.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ sub _build__genes {

sub _lookup_sequence {
my ( $self, $gene, $filename ) = @_;

return undef if(! defined($filename));
open(my $fh, '-|', 'fasta_grep -f '.$filename. ' '.$gene);
my $fasta_obj = Bio::SeqIO->new( -fh => $fh, -format => 'Fasta' );
while ( my $seq = $fasta_obj->next_seq() ) {
Expand Down
30 changes: 30 additions & 0 deletions t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env perl
use Moose;
use Data::Dumper;
use File::Slurp;
use Cwd;

BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, './t/lib' ) }
with 'TestHelper';

BEGIN {
use Test::Most;
use_ok('Bio::PanGenome::CommandLine::CreatePanGenome');
}
my $script_name = 'Bio::PanGenome::CommandLine::CreatePanGenome';
my $cwd = getcwd();


my %scripts_and_expected_files = (
' -j Local t/data/example_1.faa t/data/example_2.faa t/data/example_3.faa' =>
[ 'clustered_proteins', 't/data/expected_clustered_proteins' ],
' -j Local t/data/example_1.faa t/data/example_2.faa t/data/example_3.faa' =>
[ 'pan_genome.fa', 't/data/expected_create_pan_genome.fa' ],
);

unlink('freq_of_genes.png');

mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );

done_testing();
43 changes: 43 additions & 0 deletions t/data/example_3.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
>3333#10_00016 hypothetical protein
MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
DEIMVEPEKPKKK*
>3333#10_00018 putative lipoprotein
MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
RNLKIEYKGYK*
>3333#10_00001 nudix hydrolase
MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
SPLFTEYMLDAVNLYLKEYAPLVH*
>3333#10_00003 hypothetical protein
VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
>3333#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
GKALRMAIAQDSAAAGAIPSTKGVLE*
>3333#10_00006 imidazole glycerol phosphate synthase subunit HisH
MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
>3333#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
I*
9 changes: 9 additions & 0 deletions t/data/expected_clustered_proteins
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
group_5: 1234#10_00006 3333#10_00006
group_1: 1234#10_00001 3333#10_00001
group_6: 1234#10_00007 3333#10_00007
group_9: 1234#10_00018 3333#10_00018
group_3: 1234#10_00003 3333#10_00003
group_7: 1234#10_00016 3333#10_00016
group_4: 1234#10_00005 3333#10_00005
group_8: 1234#10_00017
group_2: 1234#10_00002
56 changes: 56 additions & 0 deletions t/data/expected_create_pan_genome.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
>1234#10_00001 nudix hydrolase
MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
SPLFTEYMLDAVNLYLKEYAPLVH*
>1234#10_00002 intramembrane serine protease
MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
YSNRS*
>1234#10_00003 hypothetical protein
VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
GKALRMAIAQDSAAAGAIPSTKGVLE*
>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
I*
>1234#10_00016 hypothetical protein
MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
DEIMVEPEKPKKK*
>1234#10_00017 LipL45-like lipoprotein
MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
LKLNPKETF*
>1234#10_00018 putative lipoprotein
MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
RNLKIEYKGYK*

0 comments on commit 80f7b78

Please sign in to comment.