Skip to content

Commit

Permalink
Terry's updates to microbedb involving log4perl and other updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthew Laird committed May 10, 2012
1 parent a36e0d1 commit 0d4e824
Show file tree
Hide file tree
Showing 18 changed files with 248 additions and 37 deletions.
21 changes: 19 additions & 2 deletions GenomeProject.pm
Expand Up @@ -32,9 +32,12 @@ use base ("MicrobeDB::MicrobeDB");
use Carp;

require MicrobeDB::Replicon;

require MicrobeDB::Search;

use Log::Log4perl qw(get_logger :nowarn);
my $logger = Log::Log4perl->get_logger();


my @FIELDS;
my @_db_fields;
my %_field_hash;
Expand Down Expand Up @@ -154,6 +157,14 @@ sub new {
$arg{$attr}->[$i] = new MicrobeDB::Replicon( %{ $arg{$attr}->[$i] } );
}
}
} elsif ($attr eq 'genome_size') {
# We have to test genome_size and genome_gc to see if they're
# null, otherwise when it tries to retreive the replicons
# to calculate it and gpv_id isn't yet set bad things happen.
# (infinite loop)
next unless$arg{$attr};
} elsif ($attr eq 'genome_gc') {
next unless$arg{$attr};
}

#do the same for references
Expand Down Expand Up @@ -185,7 +196,7 @@ sub add_replicon {
} elsif ( ref($rep) eq 'HASH' ) {
$rep_obj = new MicrobeDB::Replicon(%$rep);
} else {
croak "Only a MicrobeDB::Replicon object or hash can be used to add a Replicon";
$logger->logcroak("Only a MicrobeDB::Replicon object or hash can be used to add a Replicon");
}
push( @{ $self->{replicons} }, $rep_obj );
}
Expand All @@ -212,6 +223,12 @@ sub next_replicon {
#retrieves all replicons for this genome project
sub _retrieve_replicons{
my ($self) =@_;

# In case a genome didn't load properly and we don't
# get back a proper object, we DON'T want to search
# with no gpv_id, ugh, not good.
return () unless( $self->{gpv_id});

my $rep = new MicrobeDB::Replicon(gpv_id => $self->gpv_id());
my $so = new MicrobeDB::Search();
my @reps = $so->object_search($rep);
Expand Down
83 changes: 83 additions & 0 deletions Iterator.pm
@@ -0,0 +1,83 @@
# Copyright (C) Matthew R. Laird
# Author lairdm@sfu.ca

# This file is part of MicrobeDB

#MicrobeDB is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.

#MicrobeDB is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with MicrobeDB. If not, see <http://www.gnu.org/licenses/>.

package MicrobeDB::Iterator;

# This class contains an iterator object for use with
# MicrobeDB::Search. Some search results are too large
# to return as a single object so the iterator class
# gives access to the results row-by-row.

#inherit common methods and fields from the MicroDB class
use base ("MicrobeDB::MicrobeDB");

use strict;
use warnings;
use Carp;

my @FIELDS;

BEGIN {
@FIELDS = qw(
ret_obj
dbh_obj
);
}
use fields @FIELDS;

sub new {
my ($class, %arg) = @_;

# bless and restruct the object
my $self = fields::new($class);

foreach my $attr ( keys(%arg) ) {

#set the attribute in the object
$self->$attr( $arg{$attr} );
}

return $self;
}

# Return the next object from the database

sub nextRecord {
my ($self) = @_;

#Extract the results back into objects
{

#temporarily turn off strict
no strict "refs";
if ( my $curr_row = $self->{dbh_obj}->fetchrow_hashref ) {

#Create a object of the return type from the hash
my $obj = $self->{ret_obj}->new(%$curr_row);
return $obj;
}
}

return undef;
}

sub rows {
my ($self) = @_;

return $self->{dbh_obj}->rows;
}
20 changes: 11 additions & 9 deletions MicrobeDB.pm
Expand Up @@ -34,13 +34,15 @@ BEGIN{
}
use fields @FIELDS;

#PATH Settings
use Log::Log4perl qw(get_logger :nowarn);
my $logger = Log::Log4perl->get_logger();


#MicrobeDB MySQL settings
my $db = 'microbedb';
my $db_config = "$ENV{HOME}/.my.cnf";
die "MySQL config file:$db_config can not be found!" unless -e $db_config;
my $dsn = "DBI:mysql:database=$db;mysql_read_default_file=$db_config";
my $db_config = $ENV{HOME}/.my.cnf";
$logger->logdie("MySQL config file: $db_config can not be found!") unless -e $db_config;
my $database=$ENV{"MicrobeDB"}||"microbedb"; # if unable to access .bashrc, use microbedb
my $dsn = "DBI:mysql:database=$database;mysql_read_default_file=$db_config";
#note that these fields are taken from the config file "my.cnf"
my ($user,$pass) = ("","");
Expand Down Expand Up @@ -121,13 +123,13 @@ sub _db_connect {
eval {
#Try to connect to microbeDB
$dbh = DBI->connect( $dsn, $user, $pass, { RaiseError => 1 } )
|| die $DBI::errstr;
|| $logger->logdie($DBI::errstr);
};
#if there is an error or we the handle is empty then try again
if($@ || !defined($dbh)){
croak("Failed to connect to microbeDB! $max_tries tries have failed! \n$@") if $try == $max_tries;
warn "Failed to connect to microbeDB! Trying again in 5 seconds. This is attempt $try of $max_tries. \n$@";
$logger->logcroak("Failed to connect to microbeDB! $max_tries tries have failed! $@") if $try == $max_tries;
$logger->logwarn("Failed to connect to microbeDB! Trying again in 5 seconds. This is attempt $try of $max_tries. $@");
#increase wait time by 5 seconds on each failure
sleep(5*$try);
Expand All @@ -136,7 +138,7 @@ sub _db_connect {
}
}
unless(defined($dbh)){
die "Can't connect to db:$!";
$logger->logdie("Can't connect to db: $!");
}
# Save the dhb for later
Expand Down
24 changes: 17 additions & 7 deletions Parse.pm
Expand Up @@ -71,6 +71,13 @@ sub parse_genome{

my $gpo=$self->gpo();
$gpo->gpv_directory($dir);
$logger->debug("Parsing directory: $dir");

# Parse directory name for gp_id
if($dir =~ /.*_uid(\d+)\/?$/) {
$gpo->gp_id($1);
$logger->debug("Found gp_id: $1");
}

my @files = glob($dir.'*');

Expand Down Expand Up @@ -154,7 +161,8 @@ sub parse_gbk {
$rep->rep_seq($seq->seq());
$rep->file_name($file_name);
$rep->file_types($file_types);
$rep->rep_accnum($seq->accession_number());
my $rep_accnum = $seq->accession_number().'.'.$seq->version();
$rep->rep_accnum($rep_accnum);

my $rep_ginum = $seq->primary_id();
unless($rep_ginum =~ /\D/){
Expand Down Expand Up @@ -322,10 +330,11 @@ sub parse_ncbicompgenomefile {
sub parse_ncbiorginfofile {
my($self,$org_info_file)=@_;
my $gpo=$self->gpo();
my $taxon_id=$gpo->taxon_id();
# my $taxon_id=$gpo->taxon_id();
my $gp_id = $gpo->gp_id();

unless($taxon_id){
$logger->warn("No taxon_id so can't look up organism information in $org_info_file");
unless($gp_id){
$logger->warn("No gp_id so can't look up organism information in $org_info_file");
return;
}

Expand All @@ -343,7 +352,8 @@ sub parse_ncbiorginfofile {
}
} elsif (/^\d+\s+\w+/) {
my @entries = split(/\t/);
if ( $entries[2] == $taxon_id ) {
# if ( $entries[2] == $taxon_id ) {
if ( $entries[0] == $gp_id ) {
my $i=0;
foreach (@entries) {
$info_org_parse{ $headings[$i] } = $_;
Expand All @@ -356,7 +366,7 @@ sub parse_ncbiorginfofile {
}
if($found_orginfo){
#map the old code parse hash to the gpo
$gpo->gp_id($info_org_parse{'RefSeq project ID'}) if exists($info_org_parse{'RefSeq project ID'});
# $gpo->gp_id($info_org_parse{'RefSeq project ID'}) if exists($info_org_parse{'RefSeq project ID'});
$gpo->gram_stain($info_org_parse{'Gram Stain'}) if exists($info_org_parse{'Gram Stain'});
$gpo->disease($info_org_parse{'Disease'}) if exists($info_org_parse{'Disease'});
$gpo->pathogenic_in($info_org_parse{'Pathogenic in'}) if exists($info_org_parse{'Pathogenic in'});
Expand Down Expand Up @@ -391,7 +401,7 @@ sub parse_ncbiorginfofile {
}

}else{
$logger->warn("Couldn't find taxon id: $taxon_id within the org_info_file: $org_info_file . Many fields in GenomeProject will not be filled for this organism");
$logger->warn("Couldn't find gp id: $gp_id within the org_info_file: $org_info_file . Many fields in GenomeProject will not be filled for this organism");
}

}
Expand Down
84 changes: 81 additions & 3 deletions Replicon.pm
Expand Up @@ -30,8 +30,12 @@ use Carp;


use MicrobeDB::Gene;
use MicrobeDB::GenomeProject;
require MicrobeDB::Search;

use Log::Log4perl qw(get_logger :nowarn);
my $logger = Log::Log4perl->get_logger();

my @FIELDS;
my @replicon;
my @version;
Expand Down Expand Up @@ -63,8 +67,10 @@ BEGIN {
rna_num
file_types
rep_seq
distance_calculated
);


@version = qw(
version_id
dl_directory
Expand Down Expand Up @@ -100,6 +106,24 @@ my @_other = qw(

use fields @FIELDS;

# Needed for outputting fasta files,
# these are the allowed substitutions in the string
my %header_lookup = (
'gene_id' => '$gene->gene_id',
'ref' => '$gene->protein_accnum',
'gi' => '$gene->pid',
'rpv_id' => '$gene->rpv_id',
'gpv_id' => '$gene->gpv_id',
'start' => '$gene->gene_start',
'end' => '$gene->gene_end',
'length' => '$gene->gene_length',
'locus_tag' => '$gene->locus_tag',
'desc' => '$gene->gene_product',
'rep_desc' => '$self->definition',
'rep_accnum' => '$self->rep_accnum',
'rep_type' => '$self->rep_type',
);

sub new {
my ( $class, %arg ) = @_;

Expand Down Expand Up @@ -130,6 +154,7 @@ sub new {
#set the attribute in the object
$self->$attr( $arg{$attr} );
}

return $self;
}

Expand All @@ -138,21 +163,32 @@ sub new {
#undef is returned if that file is not available for that replicon (based on the file_types field)
sub get_filename {
my ($self,$file_suffix)=@_;

#check to see if the file type is available for this replicon
unless($self->file_types =~ /( |^)\.$file_suffix( |$)/){
return undef;
}
my $search_obj = new MicrobeDB::Search(return_obj => 'MicrobeDB::GenomeProject');

if (!defined($search_obj)) {
$logger->error("Genome Project is missing!?");
}

my ($gpo) = $search_obj->object_search($self);

if (!defined($gpo)) {
$logger->error($self->rep_accnum, " is missing in GenomeProject");
}

my $file_name = $gpo->gpv_directory() . $self->file_name() . ".$file_suffix";

#small hack since symbolic links will not work when called by php from web browser
unless($file_name =~ /home.westgrid/){
if($file_name =~ /home\/shared/){
$file_name =~ s/home/home.westgrid/;
}
}

return $file_name;
}

Expand All @@ -165,7 +201,7 @@ sub add_gene {
} elsif ( ref($gene) eq 'HASH' ) {
$gene_obj = new MicrobeDB::Gene(%$gene);
} else {
croak "Only a Gene object or hash can be used to add a Gene";
$logger->logcroak("Only a Gene object or hash can be used to add a Gene");
}
push( @{ $self->{genes} }, $gene_obj );
}
Expand Down Expand Up @@ -245,6 +281,48 @@ sub _retrieve_rep_seq{
return $replicon->rep_seq();
}

sub write_fasta {
my ($self, %args) = @_;

my $outfile = $args{'filename'} || $self->file_name;

my $append = $args{'append'} || 0;

my $seqtype = $args{'seqtype'} || 'protein';

my $headerfmt = $args{'headerfmt'} || 'gi|#gi#|ref|#ref#| #desc# [#rep_desc#]';

# If the user already gave the filename an extension
# don't tack one on
unless($outfile =~ /\.\S{3}$/) {
$outfile .= ($seqtype eq 'protein' ? '.faa' : '.ffn');
}

# Build the outfile name
my $writeline = ($append?'>':'') . ">$outfile";

open(OUT, $writeline) or
croak "Error opening fasta file $outfile: $!\n";

foreach my $gene (@{$self->genes()}) {
unless(ref($gene) eq 'MicrobeDB::Gene') {
croak "Only a Gene object can be returned here, this is a " . ref($gene);
}

next if(($seqtype eq 'protein') && !($gene->protein_seq));
next if(($seqtype eq 'dna') && !($gene->gene_seq));

# Evaluate the header format string
(my $header = $headerfmt) =~ s/#(\w+)#/$header_lookup{$1}/gee;
print OUT ">$header\n";
print OUT join("\n", grep { $_ } split(/(.{1,70})/,
($seqtype eq 'protein'?$gene->protein_seq:$gene->gene_seq)));
print OUT "\n";
}

close OUT;
}

sub table_names {
my ( $self, $field_name ) = @_;

Expand Down

0 comments on commit 0d4e824

Please sign in to comment.