Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

706 lines (504 sloc) 20.967 kb
#ifndef __pvec_h
#define __pvec_h
//#define BOOST_UBLAS_NDEBUG
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/banded.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <EigenvalueDecomposition.hpp>
#include <cassert>
#include <algorithm>
#include <iostream>
//#define USE_CBLAS
#ifdef USE_CBLAS
extern "C" {
#include <cblas.h>
}
#endif
#include "ivymike/tree_traversal_utils.h"
#include "parsimony.h"
#include "ivymike/stupid_ptr.h"
#include "ivymike/algorithm.h"
#include "sequence_model.h"
namespace {
//using ivy_mike::tip_case;
using ivy_mike::TIP_TIP;
using ivy_mike::TIP_INNER;
using ivy_mike::INNER_INNER;
}
class pvec_cgap {
// aligned_buffer<parsimony_state> v;
std::vector<parsimony_state> v;
std::vector<int> auxv;
template<typename seq_model>
class seq2aux {
public:
int operator()( parsimony_state c ) {
if( seq_model::pstate_is_gap(c) ) {
return AUX_CGAP;
} else {
return 0;
}
}
};
public:
void init( const std::vector<uint8_t> &seq ) {
// assert( v.size() == 0 );
v.resize(seq.size());
auxv.resize( seq.size() );
std::transform( seq.begin(), seq.end(), v.begin(), dna_parsimony_mapping::d2p );
std::transform( seq.begin(), seq.end(), auxv.begin(), dna_parsimony_mapping::d2aux );
}
template<typename seq_model>
void init2( const std::vector<uint8_t> &seq, const seq_model &sm ) {
v.resize(seq.size());
auxv.resize(seq.size());
std::transform( seq.begin(), seq.end(), v.begin(), seq_model::s2p );
std::transform( v.begin(), v.end(), auxv.begin(), seq2aux<seq_model>() );
std::cerr << ">>>>>>>>>>>>>>>> WARNING: untested strange code!!!\n";
}
inline const std::vector<parsimony_state> &get_v() {
return v;
}
inline const std::vector<int> &get_auxv() {
return auxv;
}
static void newview( pvec_cgap &p, pvec_cgap &c1, pvec_cgap &c2, double /*z1*/, double /*z2*/, ivy_mike::tip_case tc ) {
if( c1.v.size() != c2.v.size() ) {
std::cout << "size2: " << c1.size() << " " << c2.size() << "\n";
}
assert( c1.v.size() == c2.v.size() );
if( c1.v.size() != c2.v.size() ) {
std::cerr << "not equal: " << c1.size() << " " << c2.size() << "\n";
throw std::runtime_error( "newview: vectors have different lengths (illegal incremetal newview on modified data?)" );
}
// p.v.resize(0);
p.v.resize(c1.v.size());
p.auxv.resize(c1.auxv.size());
for( size_t i = 0; i < c1.v.size(); i++ ) {
parsimony_state ps = c1.v[i] & c2.v[i];
if( ps == 0 ) {
ps = c1.v[i] | c2.v[i];
}
//p.v.push_back( ps );
p.v[i] = ps;
const int a1 = c1.auxv[i];
const int a2 = c2.auxv[i];
const bool cgap1 = (a1 & AUX_CGAP) != 0;
const bool cgap2 = (a2 & AUX_CGAP) != 0;
// const bool open1 = (a1 & AUX_OPEN) != 0;
// const bool open2 = (a2 & AUX_OPEN) != 0;
p.auxv[i] = 0;
if( tc == TIP_TIP ) {
if( cgap1 && cgap2 ) {
p.auxv[i] = AUX_CGAP;
} else if( cgap1 != cgap2 ) {
p.auxv[i] = AUX_CGAP | AUX_OPEN;
}
} else if( tc == TIP_INNER ) {
if( cgap1 && cgap2 ) {
p.auxv[i] = AUX_CGAP;
} else if( cgap1 != cgap2 ) {
p.auxv[i] = AUX_CGAP | AUX_OPEN;
}
} else {
if( a1 == AUX_CGAP && a2 == AUX_CGAP ) {
p.auxv[i] = AUX_CGAP;
} else if( a1 == AUX_CGAP || a2 == AUX_CGAP ) {
p.auxv[i] = AUX_CGAP | AUX_OPEN;
}
}
}
}
inline size_t size() {
return v.size();
}
template<typename vt>
inline void to_int_vec( std::vector<vt> &outv ) {
outv.resize( v.size() );
std::copy( v.begin(), v.end(), outv.begin() );
}
template<typename oiter_, size_t STRIDE>
inline void to_int_vec_strided( oiter_ out ) {
//outv.assign( v.begin(), v.end() );
for( std::vector< parsimony_state >::iterator it = v.begin(); it != v.end(); ++it, out += STRIDE ) {
*out = *it;
}
// outv.resize( v.size() );
// std::copy( v.begin(), v.end(), outv.begin() );
}
template<typename vt>
inline void to_aux_vec( std::vector<vt> &outv ) {
// std::cout << "v: " << v.size() << "\n";
outv.resize( v.size() );
std::copy( auxv.begin(), auxv.end(), outv.begin() );
// std::for_each( auxv.begin(), auxv.end(), ostream_test(std::cout) );
}
template<typename oiter_, size_t STRIDE>
inline void to_aux_vec_strided( oiter_ out ) {
// std::cout << "v: " << v.size() << "\n";
for( std::vector< int >::iterator it = auxv.begin(); it != auxv.end(); ++it, out += STRIDE ) {
if( *it == AUX_CGAP ) {
*out = 0xFFFF;
} else {
*out = 0x0;
}
}
// std::transform( auxv.begin(), auxv.end(), out );
// std::for_each( auxv.begin(), auxv.end(), ostream_test(std::cout) );
}
bool operator==( const pvec_cgap &other ) const {
return v == other.v && auxv == other.auxv;
}
bool operator!=( const pvec_cgap &other ) const {
return !(*this == other);
}
};
class probgap_model {
boost::numeric::ublas::matrix<double> m_evecs;
boost::numeric::ublas::vector<double> m_evals;
boost::numeric::ublas::matrix<double> m_evecs_inv;
// ublas::diagonal_matrix<double> m_evals_diag; // used temporarily.
// ublas::matrix<double> m_prob_matrix;
// ublas::matrix<double> m_temp_matrix;
double m_gap_freq;
bool m_valid;
double calc_gap_freq ( const std::vector< std::vector< uint8_t > > &seqs ) {
size_t ngaps = 0;
size_t nres = 0;
for( std::vector< std::vector< uint8_t > >::const_iterator it = seqs.begin(); it != seqs.end(); ++it ) {
nres += it->size();
ngaps += std::count_if( it->begin(), it->end(), dna_parsimony_mapping::is_gap );
}
double rgap = double(ngaps) / nres;
std::cout << "gap rate: " << ngaps << " " << nres << "\n";
std::cout << "gap rate: " << rgap << "\n";
return rgap;
}
public:
probgap_model() : m_valid(false) {}
probgap_model( const std::vector< std::vector<uint8_t> > &seqs ) : m_valid(false) {
reset( seqs );
}
void reset( const std::vector< std::vector<uint8_t> > &seqs ) {
// initialize probgap model from input sequences
reset( calc_gap_freq( seqs ) );
}
void reset( double gap_freq ) {
namespace ublas = boost::numeric::ublas;
m_gap_freq = gap_freq;
double f[2] = {m_gap_freq, 1-m_gap_freq};
//double f[2] = {1-m_gap_freq, m_gap_freq};
ublas::matrix<double> rate_matrix(2,2);
rate_matrix(0,0) = -f[0];
rate_matrix(0,1) = f[0];
rate_matrix(1,0) = f[1];
rate_matrix(1,1) = -f[1];
std::cout << "rate matrix: " << rate_matrix << "\n";
ublas::EigenvalueDecomposition ed(rate_matrix);
m_evecs = ed.getV();
m_evals = ed.getRealEigenvalues();
// use builtin ublas lu-factorization rather than jama
{
ublas::matrix<double> A(m_evecs);
ublas::permutation_matrix<size_t> pm( A.size1() );
size_t res = ublas::lu_factorize(A,pm);
if( res != 0 ) {
throw std::runtime_error( " ublas::lu_factorize failed" );
}
m_evecs_inv = ublas::identity_matrix<double>( A.size1());
ublas::lu_substitute(A,pm,m_evecs_inv);
}
m_valid = true;
}
boost::numeric::ublas::matrix<double> setup_pmatrix( double t ) {
namespace ublas = boost::numeric::ublas;
ublas::diagonal_matrix<double> evals_diag(2);
ublas::matrix<double> prob_matrix(2,2);
ublas::matrix<double> temp_matrix(2,2);
for( int i = 0; i < 2; i++ ) {
evals_diag(i,i) = exp( t * m_evals(i));
}
prob_matrix = ublas::prod( ublas::prod( m_evecs, evals_diag, temp_matrix ), m_evecs_inv );
//m_prob_matrix = ublas::prod( ublas::prod( m_evecs, m_evals_diag ), m_evecs_inv );
// std::cout << "pmatrix: " << m_prob_matrix << "\n";
// throw std::runtime_error( "xxx" );
return prob_matrix;
}
inline double gap_freq() { return m_gap_freq; }
};
class pvec_pgap {
// aligned_buffer<parsimony_state> v;
std::vector<parsimony_state> v;
boost::numeric::ublas::matrix<double> gap_prob;
public:
static ivy_mike::stupid_ptr<probgap_model> pgap_model;
inline const std::vector<parsimony_state> &get_v() {
return v;
}
inline const boost::numeric::ublas::matrix<double> &get_gap_prob() {
return gap_prob;
}
void init( const std::vector<uint8_t> &seq ) {
//assert( v.size() == 0 );
v.resize(seq.size());
std::transform( seq.begin(), seq.end(), v.begin(), dna_parsimony_mapping::d2p );
//std::transform( seq.begin(), seq.end(), auxv.begin(), dna_parsimony_mapping::d2aux );
gap_prob.resize(2, seq.size());
for( size_t i = 0; i < seq.size(); i++ ) {
// ( 0 )
// ( 1 ) means gap
if( v[i] == 0xf ) {
gap_prob( 0, i ) = 0.0;
gap_prob( 1, i ) = 1.0;
} else {
gap_prob( 0, i ) = 1.0;
gap_prob( 1, i ) = 0.0;
}
}
}
template<typename seq_model>
void init2( const std::vector<uint8_t> &seq, seq_model sm ) {
//assert( v.size() == 0 );
v.resize(seq.size());
std::transform( seq.begin(), seq.end(), v.begin(), seq_model::s2p );
//std::transform( seq.begin(), seq.end(), auxv.begin(), dna_parsimony_mapping::d2aux );
gap_prob.resize(2, seq.size());
for( size_t i = 0; i < seq.size(); i++ ) {
// ( 0 )
// ( 1 ) means gap
if( seq_model::pstate_is_gap(v[i]) ) {
gap_prob( 0, i ) = 0.0;
gap_prob( 1, i ) = 1.0;
} else {
gap_prob( 0, i ) = 1.0;
gap_prob( 1, i ) = 0.0;
}
}
}
void newview( const pvec_pgap &c1, const pvec_pgap &c2, double z1, double z2, ivy_mike::tip_case tc ) {
newview( *this, c1, c2, z1, z2, tc );
//std::cout << "newview: " << gap_prob.size1() << "\n";
}
static void newview( pvec_pgap &p, const pvec_pgap &c1, const pvec_pgap &c2, double z1, double z2, ivy_mike::tip_case tc ) {
namespace ublas = boost::numeric::ublas;
assert( c1.v.size() == c2.v.size() );
if( c1.v.size() != c2.v.size() ) {
std::cerr << "not equal: " << c1.size() << " " << c2.size() << "\n";
throw std::runtime_error( "newview: vectors have different lengths (illegal incremetal newview on modified data?)" );
}
// p.v.resize(0);
p.v.resize(c1.v.size());
//p.gap_prob.resize(2, c1.v.size());
assert( pgap_model.is_valid_ptr() );
ublas::matrix<double> p1 = pgap_model->setup_pmatrix(z1);
ublas::matrix<double> p2 = pgap_model->setup_pmatrix(z2);
#if 1
ublas::matrix<double> t1 = ublas::prod(p1, c1.gap_prob);
ublas::matrix<double> t2 = ublas::prod(p2, c2.gap_prob);
#else
ublas::matrix<double> t1(c1.gap_prob.size1(), c1.gap_prob.size2(),0 );
ublas::matrix<double> t2(c2.gap_prob.size1(), c2.gap_prob.size2(),0 );
{
double *p1x = p1.data().begin();
double *p2x = p2.data().begin();
double *c1x = c1.gap_prob.data().begin();
double *c2x = c2.gap_prob.data().begin();
double *t1bx = t1.data().begin();
double *t2bx = t2.data().begin();
size_t m = p1.size2();
size_t n = c1.gap_prob.size2();
size_t k = p1.size1();
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1, p1x, m, c1x, n, 1, t1bx, n);
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1, p2x, m, c2x, n, 1, t2bx, n);
}
#endif
p.gap_prob = ublas::element_prod( t1, t2 );
const static double twotothe256 = 115792089237316195423570985008687907853269984665640564039457584007913129639936.0;
/* 2**256 (exactly) */
const static double minlikelihood = 1.0/twotothe256;
for( ublas::matrix<double>::iterator2 it = p.gap_prob.begin2(); it != p.gap_prob.end2(); ++it ) {
double &p1 = *it;
double &p2 = *(it.begin() + 1);
if( fabs(p1) < minlikelihood && fabs(p2) < minlikelihood ) {
// std::cout << "scale: " << p1 << " " << p2;
p1 *= twotothe256;
p2 *= twotothe256;
// std::cout << " -> " << p1 << " " << p2 << "\n";
}
}
//ublas::matrix< double >::const_iterator1 xxx = p.gap_prob.begin1();
// xxx.
//std::cout << "pvec: " << *xxx << "\n";
// ublas::matrix< double >::iterator1 tit1 = p.gap_prob.begin1();
//
//
// boost::io::ios_all_saver ioss(std::cout);
// std::cout << std::setprecision(2);
//
// std::cout << std::left;
//
// // std::transform( tit1.begin(), tit1.end(), tit1.begin(), log );
// std::copy( tit1.begin(), tit1.end(), std::ostream_iterator<double>(std::cout, " "));
// std::cout << "\n";
// ++tit1;
// // std::transform( tit1.begin(), tit1.end(), tit1.begin(), log );
// std::copy( tit1.begin(), tit1.end() + 4, std::ostream_iterator<double>(std::cout, " "));
// std::cout << "\n";
for( size_t i = 0; i < c1.v.size(); i++ ) {
parsimony_state ps = c1.v[i] & c2.v[i];
if( ps == 0 ) {
ps = c1.v[i] | c2.v[i];
}
//p.v.push_back( ps );
p.v[i] = ps;
}
}
inline size_t size() const {
return v.size();
}
template<typename vt>
inline void to_int_vec( std::vector<vt> &outv ) {
outv.assign( v.begin(), v.end() );
// outv.resize( v.size() );
// std::copy( v.begin(), v.end(), outv.begin() );
}
static inline double gap_posterior( double v1, double v2 ) {
assert( pgap_model.is_valid_ptr() );
//return v1 / (v1 + v2);
v1 *= 1 - pgap_model->gap_freq();
// throw std::runtime_error( "i think there is an error in this function. why v1 in the next line?");
v2 *= pgap_model->gap_freq();
float v = float(v1 / (v1 + v2));
if( v != v ) {
std::cerr << "meeeeep: " << v1 << " " << v2 << "\n";
throw std::runtime_error("bailing out.");
}
return v;
}
static inline double gap_ancestral_probability( double v1, double v2 ) {
assert( pgap_model.is_valid_ptr() );
//return v1 / (v1 + v2);
const double freq_ngap = 1 - pgap_model->gap_freq();
const double freq_gap = pgap_model->gap_freq();
return v2 * freq_gap / (v1 * freq_ngap + v2 * freq_gap);
}
inline void to_gap_post_vec( std::vector<double> &outv ) {
const boost::numeric::ublas::matrix<double> &t = get_pgap();
outv.clear();
outv.resize(v.size());
ivy_mike::binary_twizzle(t.begin2(), t.end2(), (t.begin1() + 1).begin(), outv.begin(), gap_posterior );
}
template<typename oiter>
inline void to_ancestral_gap_prob( oiter it ) {
const boost::numeric::ublas::matrix<double> &t = get_pgap();
std::transform(t.begin2(), t.end2(), (t.begin1() + 1).begin(), it, gap_ancestral_probability );
}
template<typename vt>
inline void to_aux_vec( std::vector<vt> &outv ) {
// std::cout << "v: " << v.size() << "\n";
// {
// std::vector<double> postv;
// to_gap_post_vec(postv);
//
//
//
// //std::copy( postv.begin(), postv.end(), std::ostream_iterator<double>(std::cout, " " ));
//
// std::transform( postv.begin(), postv.end(), std::ostream_iterator<int>(std::cout), scaler<double>(10,0,9) );
// std::cout << "\n";
// }
outv.clear();
outv.reserve(v.size());
// outv.resize( v.size() );
// std::fill( outv.begin(), outv.end(), 0 );
// std::for_each( auxv.begin(), auxv.end(), ostream_test(std::cout) );
namespace ublas = boost::numeric::ublas;
ublas::matrix<double> t = get_pgap();
// yeah! metaprogramming massacre!
ublas::matrix< double >::iterator1 tit1 = t.begin1();
#if 0
std::vector<double> odds;
odds.reserve(t.size2());
// NOTE: t.begin1() + 1).begin() is the boost::ublas way of saying "iterator over the second row"
ivy_mike::binary_twizzle( t.begin2(), t.end2(), (t.begin1() + 1).begin(), std::back_inserter(odds), std::divides<double>() );
std::transform( odds.begin(), odds.end(), odds.begin(), std::ptr_fun<double>(log) );
//std::transform( odds.begin(), odds.end(), std::back_inserter(outv), std::bind1st( std::greater<double>(), -1.0 ) );
const double odds_threshold = 0.0;
std::transform( odds.begin(), odds.end(), std::back_inserter(outv), std::bind2nd( std::less<double>(), odds_threshold ) );
// std::fill( outv.begin(), outv.end(), 1 );
#else
// set outv[i] == 1 if, prob(non-gap) is less than prob(gap)
// this is equivalent to the above code with odds_threshold == 0.0
ivy_mike::binary_twizzle( t.begin2(), t.end2(), (t.begin1() + 1).begin(), std::back_inserter(outv), std::less<double>() );
#endif
#if 0
// std::transform( t.begin2(), t.end2(), odds.begin(), calc_odds<ublas::matrix< double > > );
//boost::io::ios_all_saver ioss(std::cout);
std::cout << std::setprecision(0);
// std::cout << std::setw(5);
std::cout << std::fixed;
std::cout << std::left;
// std::cout << t(0,0) << t(0,1) << t(0,2) <<t(0,3) <<t(0,4) << "\n";
//std::copy( odds.begin(), odds.end(), std::ostream_iterator<double>(std::cout, " "));
std::copy( bias.begin(), bias.end(), std::ostream_iterator<int>(std::cout));
std::cout << "\n";
std::copy( lomag.begin(), lomag.end(), std::ostream_iterator<unsigned char>(std::cout));
std::cout << "\n";
std::transform( tit1.begin(), tit1.end(), tit1.begin(), log );
std::copy( tit1.begin(), tit1.end(), std::ostream_iterator<double>(std::cout, " "));
std::cout << "\n";
++tit1;
std::transform( tit1.begin(), tit1.end(), tit1.begin(), log );
std::copy( tit1.begin(), tit1.end(), std::ostream_iterator<double>(std::cout, " "));
std::cout << "\n";
#endif
}
// template<typename oiter_, size_t STRIDE>
// class strided_inserter {
// typedef std::iterator_traits<oiter_> traits;
// typedef typename traits::value_type reference_type;
//
// oiter_ out;
// public:
// strided_inserter( oiter_ out_ ) : out(out_) {}
//
// strided_inserter<oiter_,STRIDE> &operator=( reference_type v ) {
// *out = v;
// }
// strided_inserter<oiter_,STRIDE> &operator*() {
// return *this;
// }
//
// strided_inserter<oiter_,STRIDE> &operator++() {
// out+=STRIDE;
// return *this;
// }
//
// strided_inserter<oiter_,STRIDE> operator++(int w) {
// out+=STRIDE * w;
// return *this;
// }
//
//
// };
//
// template<typename oiter_, size_t STRIDE>
// inline void to_aux_vec( oiter_ out ) {
//
// ublas::matrix<double> t = get_pgap();
//
// // yeah! metaprogramming massacre!
//
// ublas::matrix< double >::iterator1 tit1 = t.begin1();
//
// // set outv[i] == 1 if, prob(non-gap) is less than prob(gap)
// // this is equivalent to the above code with odds_threshold == 0.0
// ivy_mike::binary_twizzle( t.begin2(), t.end2(), (t.begin1() + 1).begin(), strided_inserter<oiter_,STRIDE>(out), std::less<double>() );
// }
const boost::numeric::ublas::matrix<double> &get_pgap() {
return gap_prob;
}
};
#endif
Jump to Line
Something went wrong with that request. Please try again.