Skip to content

Commit

Permalink
Merge pull request #248 from gsomix/master
Browse files Browse the repository at this point in the history
Arrays are replaced by SGVector in CGaussianNaiveBayes
  • Loading branch information
Soeren Sonnenburg committed Jul 29, 2011
2 parents 29575c9 + af57ba7 commit ccd86e9
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 37 deletions.
71 changes: 39 additions & 32 deletions src/shogun/classifier/GaussianNaiveBayes.cpp
Expand Up @@ -19,16 +19,16 @@ using namespace shogun;

CGaussianNaiveBayes::CGaussianNaiveBayes() :
CMachine(), m_features(NULL), m_min_label(0),
m_num_classes(0), m_dim(0), m_means(NULL),
m_variances(NULL), m_label_prob(NULL), m_rates(NULL)
m_num_classes(0), m_dim(0), m_means(),
m_variances(), m_label_prob(), m_rates()
{

};

CGaussianNaiveBayes::CGaussianNaiveBayes(CFeatures* train_examples, CLabels* train_labels) :
CMachine(), m_features(NULL), m_min_label(0),
m_num_classes(0), m_dim(0), m_means(NULL),
m_variances(NULL), m_label_prob(NULL), m_rates(NULL)
m_num_classes(0), m_dim(0), m_means(),
m_variances(), m_label_prob(), m_rates()
{
ASSERT(train_examples->get_num_vectors() == train_labels->get_num_labels());
set_labels(train_labels);
Expand All @@ -40,10 +40,11 @@ m_variances(NULL), m_label_prob(NULL), m_rates(NULL)
CGaussianNaiveBayes::~CGaussianNaiveBayes()
{
SG_UNREF(m_features);
SG_FREE(m_means);
SG_FREE(m_rates);
SG_FREE(m_variances);
SG_FREE(m_label_prob);

m_means.destroy_vector();
m_rates.destroy_vector();
m_variances.destroy_vector();
m_label_prob.destroy_vector();
};

bool CGaussianNaiveBayes::train(CFeatures* data)
Expand Down Expand Up @@ -82,29 +83,35 @@ bool CGaussianNaiveBayes::train(CFeatures* data)
m_dim = m_features->get_dim_feature_space();

// allocate memory for distributions' parameters and a priori probability
m_means = SG_MALLOC(float64_t, m_num_classes*m_dim);
m_variances = SG_MALLOC(float64_t, m_num_classes*m_dim);
m_label_prob = SG_MALLOC(float64_t, m_num_classes);
m_means.vector = SG_MALLOC(float64_t, m_num_classes*m_dim);
m_means.vlen = m_num_classes*m_dim;

m_variances.vector = SG_MALLOC(float64_t, m_num_classes*m_dim);
m_variances.vlen = m_num_classes*m_dim;

m_label_prob.vector = SG_MALLOC(float64_t, m_num_classes);
m_label_prob.vlen = m_num_classes;

// allocate memory for label rates
m_rates = SG_MALLOC(float64_t, m_num_classes);
m_rates.vector = SG_MALLOC(float64_t, m_num_classes);
m_rates.vlen = m_num_classes;

// assure that memory is allocated
ASSERT(m_means);
ASSERT(m_variances);
ASSERT(m_rates);
ASSERT(m_label_prob);
ASSERT(m_means.vector);
ASSERT(m_variances.vector);
ASSERT(m_rates.vector);
ASSERT(m_label_prob.vector);

// make arrays filled by zeros before using
for (i=0;i<m_num_classes*m_dim;i++)
{
m_means[i] = 0.0;
m_variances[i] = 0.0;
m_means.vector[i] = 0.0;
m_variances.vector[i] = 0.0;
}
for (i=0;i<m_num_classes;i++)
{
m_label_prob[i] = 0.0;
m_rates[i] = 0.0;
m_label_prob.vector[i] = 0.0;
m_rates.vector[i] = 0.0;
}

SGMatrix<float64_t> feature_matrix = m_features->get_computed_dot_feature_matrix();
Expand All @@ -113,37 +120,37 @@ bool CGaussianNaiveBayes::train(CFeatures* data)
for (i=0; i<train_labels.vlen; i++)
{
for (j=0; j<m_dim; j++)
m_means[m_dim*train_labels.vector[i]+j]+=feature_matrix.matrix[i*m_dim+j];
m_means.vector[m_dim*train_labels.vector[i]+j]+=feature_matrix.matrix[i*m_dim+j];

m_label_prob[train_labels.vector[i]]+=1.0;
m_label_prob.vector[train_labels.vector[i]]+=1.0;
}

// get means of features of labels
for (i=0; i<m_num_classes; i++)
{
for (j=0; j<m_dim; j++)
m_means[m_dim*i+j] /= m_label_prob[i];
m_means.vector[m_dim*i+j] /= m_label_prob.vector[i];
}

// compute squared residuals with means available
for (i=0; i<train_labels.vlen; i++)
{
for (j=0; j<m_dim; j++)
m_variances[m_dim*train_labels.vector[i]+j]+=
CMath::sq(feature_matrix.matrix[i*m_dim+j]-m_means[m_dim*train_labels.vector[i]+j]);
m_variances.vector[m_dim*train_labels.vector[i]+j]+=
CMath::sq(feature_matrix.matrix[i*m_dim+j]-m_means.vector[m_dim*train_labels.vector[i]+j]);
}

// get variance of features of labels
for (i=0; i<m_num_classes; i++)
{
for (j=0; j<m_dim; j++)
m_variances[m_dim*i+j] /= m_label_prob[i] > 1 ? m_label_prob[i]-1 : 1;
m_variances.vector[m_dim*i+j] /= m_label_prob.vector[i] > 1 ? m_label_prob.vector[i]-1 : 1;
}

// get a priori probabilities of labels
for (i=0; i<m_num_classes; i++)
{
m_label_prob[i]/= m_num_classes;
m_label_prob.vector[i]/= m_num_classes;
}

train_labels.free_vector();
Expand Down Expand Up @@ -193,25 +200,25 @@ float64_t CGaussianNaiveBayes::apply(int32_t idx)
for (i=0; i<m_num_classes; i++)
{
// set rate to 0.0 if a priori probability is 0.0 and continue
if (m_label_prob[i]==0.0)
if (m_label_prob.vector[i]==0.0)
{
m_rates[i] = 0.0;
m_rates.vector[i] = 0.0;
continue;
}
else
m_rates[i] = m_label_prob[i];
m_rates.vector[i] = m_label_prob.vector[i];

// product all conditional gaussian probabilities
for (k=0; k<m_dim; k++)
m_rates[i]*= normal_exp(feature_vector.vector[k],i,k)/CMath::sqrt(m_variances[i*m_dim+k]);
m_rates.vector[i]*= normal_exp(feature_vector.vector[k],i,k)/CMath::sqrt(m_variances.vector[i*m_dim+k]);
}

// find label with maximum rate
int32_t max_label_idx = 0;

for (i=0; i<m_num_classes; i++)
{
if (m_rates[i]>m_rates[max_label_idx])
if (m_rates.vector[i]>m_rates.vector[max_label_idx])
max_label_idx = i;
}

Expand Down
10 changes: 5 additions & 5 deletions src/shogun/classifier/GaussianNaiveBayes.h
Expand Up @@ -117,13 +117,13 @@ class CGaussianNaiveBayes : public CMachine
int32_t m_dim;

/// means for normal distributions of features
float64_t* m_means;
SGVector<float64_t> m_means;

/// variances for normal distributions of features
float64_t* m_variances;
SGVector<float64_t> m_variances;

/// a priori probabilities of labels
float64_t* m_label_prob;
SGVector<float64_t> m_label_prob;

/** computes gaussian exponent by x, indexes, m_means and m_variances
* @param x feature value
Expand All @@ -133,11 +133,11 @@ class CGaussianNaiveBayes : public CMachine
*/
float64_t inline normal_exp(float64_t x, int32_t l_idx, int32_t f_idx)
{
return CMath::exp(-CMath::sq(x-m_means[m_dim*l_idx+f_idx])/(2*m_variances[m_dim*l_idx+f_idx]));
return CMath::exp(-CMath::sq(x-m_means.vector[m_dim*l_idx+f_idx])/(2*m_variances.vector[m_dim*l_idx+f_idx]));
}

/// label rates
float64_t* m_rates;
SGVector<float64_t> m_rates;
};

}
Expand Down

0 comments on commit ccd86e9

Please sign in to comment.