Skip to content

Commit

Permalink
Merge branch 'master' of github.com:sim82/papara_nt
Browse files Browse the repository at this point in the history
Conflicts:
	CMakeLists.txt
  • Loading branch information
sim82 committed Jan 29, 2013
2 parents 08ac07c + d241c6d commit a0ac782
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 23 deletions.
12 changes: 9 additions & 3 deletions CMakeLists.txt
Expand Up @@ -17,7 +17,10 @@ endif()

# this is a hack: only add subdirectories if IVYMIKE_INCLUDE is not already set.
# IVYMIKE_INCLUDE is abused as a flag indicating that papara should be build in 'library-only' mode...
#add_subdirectory(ivy_mike)

if( NOT PAPARA_NO_IVYMIKE )
add_subdirectory(ivy_mike)
endif()
# add_subdirectory(genassign_blast)
# ENDIF()

Expand Down Expand Up @@ -66,14 +69,16 @@ add_executable(papara papara2_main.cpp ${ALL_HEADERS})
add_executable(fasta_random_sample2 fasta_random_sample2.cpp ${ALL_HEADERS})
add_executable(fasta_to_phy fasta_to_phy.cpp ${ALL_HEADERS})
add_executable(phy_to_fasta phy_to_fasta.cpp ${ALL_HEADERS})
add_executable(phy_megamerge phy_megamerge.cpp ${ALL_HEADERS})


target_link_libraries(papara papara_core ivymike ublas_jama ${SYSDEP_LIBS} )
target_link_libraries(phy_to_fasta ivymike ${SYSDEP_LIBS} )
target_link_libraries(phy_megamerge ivymike ${SYSDEP_LIBS} )


#add_executable(phy_cut_partition phy_cut_partition.cpp ${ALL_HEADERS})
#target_link_libraries(phy_cut_partition papara_core ublas_jama ivymike ${SYSDEP_LIBS} )
add_executable(phy_cut_partition phy_cut_partition.cpp ${ALL_HEADERS})
target_link_libraries(phy_cut_partition papara_core ublas_jama ivymike ${SYSDEP_LIBS} )


if( USE_CPP11 )
Expand All @@ -86,3 +91,4 @@ if( USE_CPP11 )
target_link_libraries(dump_anc_probs ivymike ublas_jama ${BOOST_LIBS} ${SYSDEP_LIBS} PocoFoundation)
endif()

install_targets( /bin papara )
2 changes: 1 addition & 1 deletion build_papara2.sh
Expand Up @@ -5,7 +5,7 @@



g++ -o papara -O3 -msse4.1 -I. -I ivy_mike/src/ -I ublasJama-1.0.2.3 papara.cpp pvec.cpp pars_align_seq.cpp pars_align_gapp_seq.cpp parsimony.cpp sequence_model.cpp papara2_main.cpp blast_partassign.cpp align_utils.cpp ivy_mike/src/time.cpp ivy_mike/src/tree_parser.cpp ivy_mike/src/getopt.cpp ivy_mike/src/demangle.cpp ivy_mike/src/multiple_alignment.cpp ublasJama-1.0.2.3/EigenvalueDecomposition.cpp -lpthread
g++ -o papara -O3 -msse4a -I. -I ivy_mike/src/ -I ublasJama-1.0.2.3 papara.cpp pvec.cpp pars_align_seq.cpp pars_align_gapp_seq.cpp parsimony.cpp sequence_model.cpp papara2_main.cpp blast_partassign.cpp align_utils.cpp ivy_mike/src/time.cpp ivy_mike/src/tree_parser.cpp ivy_mike/src/getopt.cpp ivy_mike/src/demangle.cpp ivy_mike/src/multiple_alignment.cpp ublasJama-1.0.2.3/EigenvalueDecomposition.cpp -lpthread

#-I/usr/include/boost141/

Expand Down
2 changes: 1 addition & 1 deletion ivy_mike
8 changes: 7 additions & 1 deletion papara.cpp
Expand Up @@ -226,6 +226,11 @@ add_log_sink::~add_log_sink() {
ls_buf.remove_sink(s_);
}


std::string papara::get_version_string() {
return std::string( "2.3" );
}

template<typename seq_tag>
queries<seq_tag>::queries( const std::string &opt_qs_name ) {

Expand Down Expand Up @@ -442,7 +447,7 @@ references<pvec_t,seq_tag>::references(const char* opt_tree_name, const char* op
{

//std::cerr << "papara_nt instantiated as: " << typeid(*this).name() << "\n";
lout << "papara_nt instantiated as: " << ivy_mike::demangle(typeid(*this).name()) << "\n";
lout << "references container instantiated as: " << ivy_mike::demangle(typeid(*this).name()) << "\n";



Expand Down Expand Up @@ -1008,6 +1013,7 @@ void driver<pvec_t,seq_tag>::calc_scores(size_t n_threads, const my_references&
//
ivy_mike::timer t1;
ivy_mike::thread_group tg;
lout << "papara_core version " << papara::get_version_string() << std::endl;
lout << "start scoring, using " << n_threads << " threads" << std::endl;

typedef worker<seq_tag> worker_t;
Expand Down
2 changes: 1 addition & 1 deletion papara.h
Expand Up @@ -1073,7 +1073,7 @@ bool file_exists(const char *filename)
} // end anonymous namespace for inline util functions (move to impl file!)



std::string get_version_string();
} // end of namespace papara

#endif
Expand Down
15 changes: 10 additions & 5 deletions papara2_main.cpp
Expand Up @@ -37,7 +37,7 @@ void print_banner( std::ostream &os ) {
os << "| __/ _` | __/ _` | // _` |\n";
os << "| | | (_| | | | (_| | |\\ \\ (_| |\n";
os << "\\_| \\__,_\\_| \\__,_\\_| \\_\\__,_|\n";
os << " Version 2.3\n";
os << " papara_core version " << papara::get_version_string() << "\n";

}

Expand Down Expand Up @@ -215,11 +215,15 @@ void run_papara( const std::string &qs_name, const std::string &alignment_name,
}


void print_commandline( std::ostream &os, char **argv, int argc ) {

int main( int argc, char *argv[] ) {
std::stringstream ss;
std::copy( argv, argv + argc, std::ostream_iterator<char *>( os, " " ) );
os << std::endl;
}

// aligned_buffer<int> xxx(1024);

int main( int argc, char *argv[] ) {


namespace igo = ivy_mike::getopt;
Expand Down Expand Up @@ -355,7 +359,8 @@ int main( int argc, char *argv[] ) {

papara::add_log_tee papara_log_file( logs );


papara::lout << "papara called as:\n";
print_commandline( papara::lout, argv, argc );

const bool ref_gaps = !opt_no_ref_gaps;

Expand Down
5 changes: 3 additions & 2 deletions phy_cut_partition.cpp
Expand Up @@ -56,7 +56,8 @@ int main( int argc, char *argv[] ) {
std::ostream &os = std::cout;

size_t name_width = lp.max_name_len() + 1;


os << lp.size() << " " << col_max - col_min << "\n";
for( int i = 0, size = lp.size(); i < size; ++i ) {
std::string name = lp.name_at(i);

Expand All @@ -74,4 +75,4 @@ int main( int argc, char *argv[] ) {
}


}
}
151 changes: 151 additions & 0 deletions phy_megamerge.cpp
@@ -0,0 +1,151 @@
/*
* Copyright (C) 2013 Simon A. Berger
*
* This file is part of papara.
*
* papara is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* papara is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with papara. If not, see <http://www.gnu.org/licenses/>.
*/



#include <iostream>
#include <fstream>
#include <stdexcept>
#include <iomanip>
#include <vector>
#include <string>
#include <set>
#include <map>

#include "ivymike/large_phylip.h"


void collect_name_prefixes( std::map<std::string, std::vector<std::pair<size_t,size_t> > > &name_prefixes, std::set<std::string> &non_prefixed_names, const ivy_mike::large_phylip &lp, size_t lp_num ) {

const size_t lp_size = lp.size();

for( size_t i = 0; i < lp_size; ++i ) {
std::string name = lp.name_at(i);

if( name.find("motu_cluster_") == 0 ) {
size_t epos = name.find( "." );

assert( epos != name.npos );
std::string prefix = name.substr(0, epos);

//name_prefixes.insert( prefix );
std::vector<std::pair<size_t,size_t> > &location_list = name_prefixes[prefix];

location_list.push_back(std::make_pair( lp_num, i ) );
} else {
non_prefixed_names.insert( name );
}
}
}


std::string merge_nongap_columns( const std::vector<ivy_mike::large_phylip *> &phys, const std::vector<std::pair<size_t,size_t> > &matches ) {
const size_t seq_len = phys.front()->sequence_len();

std::string out_seq;


out_seq.reserve( seq_len );



for( size_t i = 0; i < seq_len; ++i ) {
char c = '-';


for( std::vector< std::pair< size_t, size_t > >::const_iterator it = matches.begin(); it != matches.end(); ++it ) {
const ivy_mike::large_phylip &lp = *phys.at( it->first );
char *base_ptr = (char *)lp.sequence_begin_at( it->second );

if( base_ptr[i] != '-' ) {
if( c != '-' ) {
std::cerr << "meeeeeeeeeep: conflict!" << std::endl;
throw std::runtime_error( "bailing out\n" );
}

c = base_ptr[i];
}

}

out_seq.push_back( c );

}

return out_seq;
}

int main( int argc, char *argv[] ) {
std::vector<ivy_mike::large_phylip *> phys;
std::map<std::string, std::vector<std::pair<size_t,size_t> > > name_prefixes;
std::set<std::string> non_prefixed_names;

for( size_t i = 1; i < size_t(argc); ++i ) {
phys.push_back( new ivy_mike::large_phylip(argv[i]));

collect_name_prefixes( name_prefixes, non_prefixed_names, *phys.back(), phys.size() - 1 );
}

size_t max_name_len = 0;

std::cout << "non prefix: " << non_prefixed_names.size() << "\n";
for( std::set< std::string >::iterator it = non_prefixed_names.begin(); it != non_prefixed_names.end(); ++it ) {
max_name_len = std::max( max_name_len, it->size() );
}



for( std::map< std::string, std::vector< std::pair< size_t, size_t > > >::iterator it = name_prefixes.begin(); it != name_prefixes.end(); ++it ) {
max_name_len = std::max( max_name_len, it->first.size() );

std::cout << it->first << ":";
for( std::vector< std::pair< size_t, size_t > >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2 ) {
std::cout << " " << it2->first << "/" << it2->second;
}

std::cout << "\n";
}

std::ofstream os( "merged.phy" );



{
ivy_mike::large_phylip &temp = *phys.front();

os << non_prefixed_names.size() + name_prefixes.size() << " " << temp.sequence_len() << "\n";

for( std::set< std::string >::iterator it = non_prefixed_names.begin(); it != non_prefixed_names.end(); ++it ) {
size_t idx = temp.getIdx( it->c_str() );

os << std::setw( max_name_len + 1 ) << std::left << *it;
std::copy( temp.sequence_begin_at(idx), temp.sequence_end_at(idx), std::ostream_iterator<char>(os) );
os << "\n";
}
}

for( std::map< std::string, std::vector< std::pair< size_t, size_t > > >::iterator it = name_prefixes.begin(); it != name_prefixes.end(); ++it ) {
os << std::setw( max_name_len + 1 ) << std::left << it->first;

std::string merged_seq = merge_nongap_columns( phys, it->second );
std::copy( merged_seq.begin(), merged_seq.end(), std::ostream_iterator<char>(os) );
os << "\n";
}

}
14 changes: 8 additions & 6 deletions stepwise_align.h
Expand Up @@ -790,7 +790,7 @@ class pvec_aligner_vec {
size_t block_start_outer = a_start_idx;
//ptr_block_outer.s_iter = s_.base();
//ptr_block_outer.si_iter = si_.base();

//std::cerr << "sm_inc: " << &(*sm_inc_prof_.begin()) << std::endl;


ticks ticks1 = getticks();
Expand Down Expand Up @@ -841,14 +841,16 @@ class pvec_aligner_vec {

//aiter a_aux_end_this = ptr_block.a_aux_prof_iter + W * block_width;

score_t * __restrict s_iter = s_.base();
score_t * __restrict si_iter = si_.base();
score_t * s_iter = s_.base();
score_t * si_iter = si_.base();
// score_t * __restrict a_aux_prof_iter = &(*(a_aux_start + block_start * W));
// score_t * __restrict a_aux_prof_end = &(*(a_aux_start + block_end * W));
score_t * __restrict sm_inc_iter = &(*(sm_inc_prof_.begin() + (*it_b) * av_size_all + block_start * W));
score_t * __restrict sm_inc_end = &(*(sm_inc_prof_.begin() + (*it_b) * av_size_all + block_end * W));
score_t * sm_inc_iter = &(*(sm_inc_prof_.begin() + (*it_b) * av_size_all + block_start * W));
score_t * sm_inc_end = &(*(sm_inc_prof_.begin() + (*it_b) * av_size_all + block_end * W));



_mm_prefetch( (const char *)sm_inc_iter, _MM_HINT_T0 );
// _mm_prefetch( (const char *)sm_inc_iter, _MM_HINT_T0 );


vec_t last_sdiag = vu::load( &(*block_sdiag_it));
Expand Down
20 changes: 17 additions & 3 deletions vec_unit.h
Expand Up @@ -422,12 +422,26 @@ struct vector_unit<int, 4> {
return _mm_max_epi32( a, b );
#else
//#warning "probably untested code!"
assert(0);
const vec_t ma = _mm_cmpgt_epi32( a, b );
return _mm_or_si128( _mm_and_si128( ma, a ), _mm_andnot_si128( ma, b ) );
const vec_t ret = _mm_or_si128( _mm_and_si128( ma, a ), _mm_andnot_si128( ma, b ) );
#if 0
println( a );
println( b );
println( ret );

assert(0);
#endif
return ret;
#endif
}


static inline void println( const vec_t & v ) {

T tmp[W];
_mm_storeu_si128( (vec_t*)tmp, v );
printf( "%d %d %d %d\n", tmp[0], tmp[1], tmp[2], tmp[3] );
}

static inline const vec_t abs_diff( const vec_t &a, const vec_t &b ) {
// i don't really like this function, as ideally this would just be abs(sub(a,b)),
// but there doesn't seem to be a fast way to implement abs on pre SSSSSSE3.
Expand Down

0 comments on commit a0ac782

Please sign in to comment.