Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests for create pan genome script #10

Merged
merged 1 commit into from
Apr 30, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lib/Bio/PanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ use Bio::PanGenome::InflateClusters;
use Bio::PanGenome::AnalyseGroups;
use Bio::PanGenome::GroupLabels;
use Bio::PanGenome::AnnotateGroups;
use Bio::PanGenome::Output::OneGenePerGroupFasta;

has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
Expand Down Expand Up @@ -92,13 +94,21 @@ sub run {
groups_filename => $output_group_labels_filename,
);
$annotate_groups->reannotate;

my $one_gene_per_fasta = Bio::PanGenome::Output::OneGenePerGroupFasta->new(
analyse_groups => $analyse_groups_obj,
output_filename => $self->output_pan_geneome_filename
);
$one_gene_per_fasta->create_file();

unlink($output_blast_results_filename);
unlink($output_combined_filename);
unlink($output_cd_hit_filename );
unlink($output_mcl_filename );
unlink($output_inflate_clusters_filename);
unlink($output_group_labels_filename);
unlink($output_cd_hit_filename.'.clstr');
unlink($output_cd_hit_filename.'.bak.clstr');
}

no Moose;
Expand Down
8 changes: 4 additions & 4 deletions lib/Bio/PanGenome/CommandLine/CreatePanGenome.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_pro
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'LSF' );
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'ro', isa => 'Str', default => 'mcl' );
has 'mcxdeblast_exec' => ( is => 'rw', isa => 'Str', default => 'mcxdeblast' );
has 'mcl_exec' => ( is => 'rw', isa => 'Str', default => 'mcl' );

has '_error_message' => ( is => 'rw', isa => 'Str' );

Expand All @@ -39,8 +39,8 @@ sub BUILD {
'j|job_runner=s' => \$job_runner,
'm|makeblastdb_exec=s' => \$makeblastdb_exec,
'b|blastp_exec=s' => \$blastp_exec,
'd|mcxdeblast_exec' => \$mcxdeblast_exec,
'c|mcl_exec' => \$mcl_exec,
'd|mcxdeblast_exec=s' => \$mcxdeblast_exec,
'c|mcl_exec=s' => \$mcl_exec,
'h|help' => \$help,
);

Expand Down
5 changes: 2 additions & 3 deletions lib/Bio/PanGenome/ExtractProteomeFromGFF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ sub _build__working_directory_name {
}

sub _gff_parser {
my ( $self, $filename ) = @_;
# $self->_awk_filter .
open( my $fh, '-|', "cat " . $filename ) or die "Couldnt open GFF file";
my ( $self, $filename ) = @_;
open( my $fh, '-|', $self->_awk_filter . $filename ) or die "Couldnt open GFF file";
my $gff_parser = Bio::Tools::GFF->new( -fh => $fh, gff_version => 3 );
return $gff_parser;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/PanGenome/Output/GroupMultifasta.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ sub _build__genes {

sub _lookup_sequence {
my ( $self, $gene, $filename ) = @_;

return undef if(! defined($filename));
open(my $fh, '-|', 'fasta_grep -f '.$filename. ' '.$gene);
my $fasta_obj = Bio::SeqIO->new( -fh => $fh, -format => 'Fasta' );
while ( my $seq = $fasta_obj->next_seq() ) {
Expand Down
30 changes: 30 additions & 0 deletions t/Bio/PanGenome/CommandLine/CreatePanGenome.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env perl
use Moose;
use Data::Dumper;
use File::Slurp;
use Cwd;

BEGIN { unshift( @INC, './lib' ) }
BEGIN { unshift( @INC, './t/lib' ) }
with 'TestHelper';

BEGIN {
use Test::Most;
use_ok('Bio::PanGenome::CommandLine::CreatePanGenome');
}
my $script_name = 'Bio::PanGenome::CommandLine::CreatePanGenome';
my $cwd = getcwd();


my %scripts_and_expected_files = (
' -j Local t/data/example_1.faa t/data/example_2.faa t/data/example_3.faa' =>
[ 'clustered_proteins', 't/data/expected_clustered_proteins' ],
' -j Local t/data/example_1.faa t/data/example_2.faa t/data/example_3.faa' =>
[ 'pan_genome.fa', 't/data/expected_create_pan_genome.fa' ],
);

unlink('freq_of_genes.png');

mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );

done_testing();
43 changes: 43 additions & 0 deletions t/data/example_3.faa
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
>3333#10_00016 hypothetical protein
MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
DEIMVEPEKPKKK*
>3333#10_00018 putative lipoprotein
MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
RNLKIEYKGYK*
>3333#10_00001 nudix hydrolase
MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
SPLFTEYMLDAVNLYLKEYAPLVH*
>3333#10_00003 hypothetical protein
VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
>3333#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
GKALRMAIAQDSAAAGAIPSTKGVLE*
>3333#10_00006 imidazole glycerol phosphate synthase subunit HisH
MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
>3333#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
I*
9 changes: 9 additions & 0 deletions t/data/expected_clustered_proteins
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
group_5: 1234#10_00006 3333#10_00006
group_1: 1234#10_00001 3333#10_00001
group_6: 1234#10_00007 3333#10_00007
group_9: 1234#10_00018 3333#10_00018
group_3: 1234#10_00003 3333#10_00003
group_7: 1234#10_00016 3333#10_00016
group_4: 1234#10_00005 3333#10_00005
group_8: 1234#10_00017
group_2: 1234#10_00002
56 changes: 56 additions & 0 deletions t/data/expected_create_pan_genome.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
>1234#10_00001 nudix hydrolase
MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
SPLFTEYMLDAVNLYLKEYAPLVH*
>1234#10_00002 intramembrane serine protease
MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
YSNRS*
>1234#10_00003 hypothetical protein
VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
GKALRMAIAQDSAAAGAIPSTKGVLE*
>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
I*
>1234#10_00016 hypothetical protein
MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
DEIMVEPEKPKKK*
>1234#10_00017 LipL45-like lipoprotein
MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
LKLNPKETF*
>1234#10_00018 putative lipoprotein
MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
RNLKIEYKGYK*