Skip to content

Commit

Permalink
enable major/minor string finding over 2 snp feature objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Soeren Sonnenburg committed Jul 22, 2011
1 parent 35719a8 commit 0f1f5d9
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 17 deletions.
36 changes: 22 additions & 14 deletions src/shogun/features/SNPFeatures.cpp
Expand Up @@ -208,7 +208,7 @@ void CSNPFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t
strings->free_feature_vector(vec, vec_idx1, free_vec1);
}

void CSNPFeatures::obtain_base_strings()
void CSNPFeatures::find_minor_major_strings(uint8_t* minor, uint8_t* major)
{
for (int32_t i=0; i<num_strings; i++)
{
Expand All @@ -217,29 +217,37 @@ void CSNPFeatures::obtain_base_strings()
uint8_t* vec = ((CStringFeatures<uint8_t>*) strings)->get_feature_vector(i, len, free_vec);
ASSERT(string_length==len);

if (i==0)
{
size_t tlen=(len+1)*sizeof(uint8_t);
m_str_min=(uint8_t*) SG_MALLOC(tlen);
m_str_maj=(uint8_t*) SG_MALLOC(tlen);
memset(m_str_min, 0, tlen);
memset(m_str_maj, 0, tlen);
}

for (int32_t j=0; j<len; j++)
{
// skip sequencing errors
if (vec[j]=='0')
continue;

if (m_str_min[j]==0)
m_str_min[j]=vec[j];
else if (m_str_maj[j]==0 && vec[j]!=m_str_min[j])
m_str_maj[j]=vec[j];
if (minor[j]==0)
minor[j]=vec[j];
else if (major[j]==0 && vec[j]!=minor[j])
major[j]=vec[j];
}

((CStringFeatures<uint8_t>*) strings)->free_feature_vector(vec, i, free_vec);
}
}

void CSNPFeatures::obtain_base_strings(CSNPFeatures* snp)
{
SG_FREE(m_str_min);
SG_FREE(m_str_maj);
size_t tlen=(string_length+1)*sizeof(uint8_t);

m_str_min=(uint8_t*) SG_MALLOC(tlen);
m_str_maj=(uint8_t*) SG_MALLOC(tlen);
memset(m_str_min, 0, tlen);
memset(m_str_maj, 0, tlen);

find_minor_major_strings(m_str_min, m_str_maj);

if (snp)
snp->find_minor_major_strings(m_str_min, m_str_maj);

for (int32_t j=0; j<string_length; j++)
{
Expand Down
18 changes: 15 additions & 3 deletions src/shogun/features/SNPFeatures.h
Expand Up @@ -212,9 +212,12 @@ class CSNPFeatures : public CDotFeatures
return (char*) m_str_maj;
}

/** compute the basee strings from current strings */
void obtain_base_strings();

/** compute the base strings from current strings optionally taking
* into account snp
*
* @param snp optionally compute base string for snp too
*/
void obtain_base_strings(CSNPFeatures* snp=NULL);

/** @return object name */
inline virtual const char* get_name() const { return "SNPFeatures"; }
Expand All @@ -228,6 +231,15 @@ class CSNPFeatures : public CDotFeatures
*/
static SGMatrix<float64_t> get_2x3_table(CSNPFeatures* pos, CSNPFeatures* neg);

private:
/** determine minor and major base strings from current strings
* @arg minor - array of string_length inited with zero that will
* contain the minor base string
* @arg major - array of string_length inited with zero that will
* contain the major base string
*/
void find_minor_major_strings(uint8_t* minor, uint8_t* major);

protected:
/** stringfeatures the wdfeatures are based on*/
CStringFeatures<uint8_t>* strings;
Expand Down

0 comments on commit 0f1f5d9

Please sign in to comment.