Skip to content

Commit

Permalink
minor changes; kde method commented
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdarparijat committed Jul 20, 2014
1 parent eed5373 commit b972fb5
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 43 deletions.
10 changes: 5 additions & 5 deletions src/shogun/multiclass/tree/BallTree.cpp
Expand Up @@ -92,12 +92,12 @@ void CBallTree::init_node(bnode_t* node, index_t start, index_t end)
SGVector<float64_t> center(m_data.num_rows);
for (int32_t i=0;i<m_data.num_rows;i++)
{
center[i]=m_data(i,vec_id[start]);
upper_bounds[i]=m_data(i,vec_id[start]);
lower_bounds[i]=m_data(i,vec_id[start]);
center[i]=m_data(i,m_vec_id[start]);
upper_bounds[i]=m_data(i,m_vec_id[start]);
lower_bounds[i]=m_data(i,m_vec_id[start]);
for (int32_t j=start+1;j<=end;j++)
{
float64_t data_pt=m_data(i,vec_id[j]);
float64_t data_pt=m_data(i,m_vec_id[j]);
upper_bounds[i]=CMath::max(upper_bounds[i],data_pt);
lower_bounds[i]=CMath::min(lower_bounds[i],data_pt);
center[i]+=data_pt;
Expand All @@ -108,7 +108,7 @@ void CBallTree::init_node(bnode_t* node, index_t start, index_t end)

float64_t radius=0;
for (int32_t i=start;i<=end;i++)
radius=CMath::max(distance(vec_id[i],center.vector,center.vlen),radius);
radius=CMath::max(distance(m_vec_id[i],center.vector,center.vlen),radius);

node->data.radius=radius;
node->data.center=center;
Expand Down
8 changes: 4 additions & 4 deletions src/shogun/multiclass/tree/KDTree.cpp
Expand Up @@ -111,12 +111,12 @@ void CKDTree::init_node(bnode_t* node, index_t start, index_t end)

for (int32_t i=0;i<m_data.num_rows;i++)
{
upper_bounds[i]=m_data(i,vec_id[start]);
lower_bounds[i]=m_data(i,vec_id[start]);
upper_bounds[i]=m_data(i,m_vec_id[start]);
lower_bounds[i]=m_data(i,m_vec_id[start]);
for (int32_t j=start+1;j<=end;j++)
{
upper_bounds[i]=CMath::max(upper_bounds[i],m_data(i,vec_id[j]));
lower_bounds[i]=CMath::min(lower_bounds[i],m_data(i,vec_id[j]));
upper_bounds[i]=CMath::max(upper_bounds[i],m_data(i,m_vec_id[j]));
lower_bounds[i]=CMath::min(lower_bounds[i],m_data(i,m_vec_id[j]));
}
}

Expand Down
70 changes: 41 additions & 29 deletions src/shogun/multiclass/tree/NbodyTree.cpp
Expand Up @@ -47,11 +47,11 @@ void CNbodyTree::build_tree(CDenseFeatures<float64_t>* data)
REQUIRE(data,"data not set\n");
REQUIRE(m_leaf_size>0,"Leaf size should be greater than 0\n");

knn_done=false;
m_knn_done=false;
m_data=data->get_feature_matrix();

vec_id=SGVector<index_t>(m_data.num_cols);
vec_id.range_fill(0);
m_vec_id=SGVector<index_t>(m_data.num_cols);
m_vec_id.range_fill(0);

set_root(recursive_build(0,m_data.num_cols-1));
}
Expand All @@ -61,10 +61,10 @@ void CNbodyTree::query_knn(CDenseFeatures<float64_t>* data, int32_t k)
REQUIRE(data,"Query data not supplied\n")
REQUIRE(data->get_num_features()==m_data.num_rows,"query data dimension should be same as training data dimension\n")

knn_done=true;
m_knn_done=true;
SGMatrix<float64_t> qfeats=data->get_feature_matrix();
knn_dists=SGMatrix<float64_t>(k,qfeats.num_cols);
knn_indices=SGMatrix<index_t>(k,qfeats.num_cols);
m_knn_dists=SGMatrix<float64_t>(k,qfeats.num_cols);
m_knn_indices=SGMatrix<index_t>(k,qfeats.num_cols);
int32_t dim=qfeats.num_rows;

for (int32_t i=0;i<qfeats.num_cols;i++)
Expand All @@ -76,8 +76,8 @@ void CNbodyTree::query_knn(CDenseFeatures<float64_t>* data, int32_t k)

float64_t mdist=min_dist(root,qfeats.matrix+i*dim,dim);
query_knn_single(heap,mdist,root,qfeats.matrix+i*dim,dim);
memcpy(knn_dists.matrix+i*k,heap->get_dists(),k*sizeof(float64_t));
memcpy(knn_indices.matrix+i*k,heap->get_indices(),k*sizeof(index_t));
memcpy(m_knn_dists.matrix+i*k,heap->get_dists(),k*sizeof(float64_t));
memcpy(m_knn_indices.matrix+i*k,heap->get_indices(),k*sizeof(index_t));

delete(heap);
}
Expand Down Expand Up @@ -145,17 +145,17 @@ SGVector<float64_t> CNbodyTree::log_kernel_density_dual(SGMatrix<float64_t> test

SGMatrix<float64_t> CNbodyTree::get_knn_dists()
{
if (knn_done)
return knn_dists;
if (m_knn_done)
return m_knn_dists;

SG_ERROR("knn query has not been executed yet\n");
return SGMatrix<float64_t>();
}

SGMatrix<index_t> CNbodyTree::get_knn_indices()
{
if (knn_done)
return knn_indices;
if (m_knn_done)
return m_knn_indices;

SG_ERROR("knn query has not been executed yet\n");
return SGMatrix<index_t>();
Expand All @@ -172,7 +172,7 @@ void CNbodyTree::query_knn_single(CKNNHeap* heap, float64_t mdist, bnode_t* node
index_t end=node->data.end_idx;

for (int32_t i=start;i<=end;i++)
heap->push(vec_id[i],distance(vec_id[i],arr,dim));
heap->push(m_vec_id[i],distance(m_vec_id[i],arr,dim));

return;
}
Expand Down Expand Up @@ -255,7 +255,7 @@ void CNbodyTree::get_kde_single(bnode_t* node,float64_t* data, EKernelType kerne

for (int32_t i=node->data.start_idx;i<=node->data.end_idx;i++)
{
float64_t pt_eval=CKernelDensity::log_kernel(kernel,distance(vec_id[i],data,m_data.num_rows),h);
float64_t pt_eval=CKernelDensity::log_kernel(kernel,distance(m_vec_id[i],data,m_data.num_rows),h);
min_bound_global=logsumexp(pt_eval,min_bound_global);
}

Expand Down Expand Up @@ -306,6 +306,7 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
// global bound criterion met || local bound criterion met
if (global_criterion || local_criterion)
{
// log density of all query points in the node is increased by K(mean + spread/2)
float64_t center_density=logsumexp(min_bound_node,spread_node-CMath::log(2))-CMath::log(querynode->data.end_idx-querynode->data.start_idx+1);
for (int32_t i=querynode->data.start_idx;i<=querynode->data.end_idx;i++)
log_density[qid[i]]=logsumexp(log_density[qid[i]],center_density);
Expand All @@ -319,12 +320,13 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
min_bound_global=logdiffexp(min_bound_global,min_bound_node);
spread_global=logdiffexp(spread_global,spread_node);

// point by point evavuation of density
for (int32_t i=querynode->data.start_idx;i<=querynode->data.end_idx;i++)
{
float64_t q=-CMath::INFTY;
for (int32_t j=refnode->data.start_idx;j<=refnode->data.end_idx;j++)
{
float64_t pt_eval=CKernelDensity::log_kernel(kernel_type,distance(vec_id[j],qdata.matrix+dim*qid[i],dim),h);
float64_t pt_eval=CKernelDensity::log_kernel(kernel_type,distance(m_vec_id[j],qdata.matrix+dim*qid[i],dim),h);
q=logsumexp(q,pt_eval);
}

Expand All @@ -335,22 +337,23 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
return;
}

// if query node is leaf
// if query node is leaf - just recurse on the reference tree
if (querynode->data.is_leaf)
{
bnode_t* lchild=refnode->left();
bnode_t* rchild=refnode->right();
int32_t queryn=querynode->data.end_idx-querynode->data.start_idx+1;

// compute bounds for query node and left child of ref node
float64_t lower_dist=min_dist_dual(querynode,lchild);
float64_t upper_dist=max_dist_dual(querynode,lchild);
int32_t refn_l=lchild->data.end_idx-lchild->data.start_idx+1;
float64_t lower_bound_childl=CMath::log(queryn)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_childl=logdiffexp(CMath::log(queryn)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_childl);

// compute bounds for query node and right child of ref node
lower_dist=min_dist_dual(querynode,rchild);
upper_dist=max_dist_dual(querynode,rchild);

int32_t refn_r=rchild->data.end_idx-rchild->data.start_idx+1;
float64_t lower_bound_childr=CMath::log(queryn)+CMath::log(refn_r)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_childr=logdiffexp(CMath::log(queryn)+CMath::log(refn_r)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_childr);
Expand All @@ -372,7 +375,7 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
return;
}

// if reference node is leaf
// if reference node is leaf - just recurse on the query tree
if (refnode->data.is_leaf)
{
int32_t ref_n=refnode->data.end_idx-refnode->data.start_idx+1;
Expand All @@ -382,11 +385,13 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
int32_t query_nl=lchild->data.end_idx-lchild->data.start_idx+1;
int32_t query_nr=rchild->data.end_idx-rchild->data.start_idx+1;

// compute bounds for left child of query node and ref node
float64_t lower_dist=min_dist_dual(refnode,lchild);
float64_t upper_dist=max_dist_dual(refnode,lchild);
float64_t lower_bound_childl=CMath::log(query_nl)+CMath::log(ref_n)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_childl=logdiffexp(CMath::log(query_nl)+CMath::log(ref_n)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_childl);

// compute bounds for right child of query node and ref node
lower_dist=min_dist_dual(querynode,rchild);
upper_dist=max_dist_dual(querynode,rchild);
float64_t lower_bound_childr=CMath::log(query_nr)+CMath::log(ref_n)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
Expand All @@ -409,6 +414,7 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
return;
}

// if none of above - apply 4 way recursion in both trees: left-left, left-right, right-left, right-right
bnode_t* refchildl=refnode->left();
bnode_t* refchildr=refnode->right();
bnode_t* querychildl=querynode->left();
Expand All @@ -419,21 +425,25 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
float64_t queryn_l=querychildl->data.end_idx-querychildl->data.start_idx+1;
float64_t queryn_r=querychildr->data.end_idx-querychildr->data.start_idx+1;

// left child-left child bounds
float64_t lower_dist=min_dist_dual(querychildl,refchildl);
float64_t upper_dist=max_dist_dual(querychildl,refchildl);
float64_t lower_bound_ll=CMath::log(queryn_l)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_ll=logdiffexp(CMath::log(queryn_l)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_ll);

// left-right bounds
lower_dist=min_dist_dual(querychildl,refchildr);
upper_dist=max_dist_dual(querychildl,refchildr);
float64_t lower_bound_lr=CMath::log(queryn_l)+CMath::log(refn_r)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_lr=logdiffexp(CMath::log(queryn_l)+CMath::log(refn_r)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_lr);

// right-left bounds
lower_dist=min_dist_dual(querychildr,refchildl);
upper_dist=max_dist_dual(querychildr,refchildl);
float64_t lower_bound_rl=CMath::log(queryn_r)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
float64_t spread_rl=logdiffexp(CMath::log(queryn_r)+CMath::log(refn_l)+CKernelDensity::log_kernel(kernel_type,lower_dist,h),lower_bound_rl);

// right-right bounds
lower_dist=min_dist_dual(querychildr,refchildr);
upper_dist=max_dist_dual(querychildr,refchildr);
float64_t lower_bound_rr=CMath::log(queryn_r)+CMath::log(refn_r)+CKernelDensity::log_kernel(kernel_type,upper_dist,h);
Expand All @@ -452,9 +462,11 @@ void CNbodyTree::kde_dual(bnode_t* refnode, bnode_t* querynode, SGVector<index_t
spread_global=logsumexp(spread_global,spread_rl);
spread_global=logsumexp(spread_global,spread_rr);

// left-left and left-right recursions
kde_dual(refchildl,querychildl,qid,qdata,log_density,kernel_type,h,log_atol,log_rtol,log_norm,lower_bound_ll,spread_ll, min_bound_global,spread_global);
kde_dual(refchildr,querychildl,qid,qdata,log_density,kernel_type,h,log_atol,log_rtol,log_norm,lower_bound_lr,spread_lr, min_bound_global,spread_global);

// right-left and right-right recursions
kde_dual(refchildl,querychildr,qid,qdata,log_density,kernel_type,h,log_atol,log_rtol,log_norm,lower_bound_rl,spread_rl, min_bound_global,spread_global);
kde_dual(refchildr,querychildr,qid,qdata,log_density,kernel_type,h,log_atol,log_rtol,log_norm,lower_bound_rr,spread_rr, min_bound_global, spread_global);

Expand All @@ -474,14 +486,14 @@ void CNbodyTree::partition(index_t dim, index_t start, index_t end, index_t mid)
index_t midindex=left;
for (int32_t i=left;i<right;i++)
{
if (m_data(dim,vec_id[i])<m_data(dim,vec_id[right]))
if (m_data(dim,m_vec_id[i])<m_data(dim,m_vec_id[right]))
{
CMath::swap(*(vec_id.vector+i),*(vec_id.vector+midindex));
CMath::swap(*(m_vec_id.vector+i),*(m_vec_id.vector+midindex));
midindex+=1;
}
}

CMath::swap(*(vec_id.vector+midindex),*(vec_id.vector+right));
CMath::swap(*(m_vec_id.vector+midindex),*(m_vec_id.vector+right));
if (midindex==mid)
break;
else if (midindex<mid)
Expand Down Expand Up @@ -515,16 +527,16 @@ void CNbodyTree::init()
{
m_data=SGMatrix<float64_t>();
m_leaf_size=1;
vec_id=SGVector<index_t>();
m_vec_id=SGVector<index_t>();
m_dist=D_EUCLIDEAN;
knn_done=false;
knn_dists=SGMatrix<float64_t>();
knn_indices=SGMatrix<index_t>();
m_knn_done=false;
m_knn_dists=SGMatrix<float64_t>();
m_knn_indices=SGMatrix<index_t>();

SG_ADD(&m_data,"m_data","data matrix",MS_NOT_AVAILABLE);
SG_ADD(&m_leaf_size,"m_leaf_size","leaf size",MS_NOT_AVAILABLE);
SG_ADD(&vec_id,"vec_id","id of vectors",MS_NOT_AVAILABLE);
SG_ADD(&knn_done,"knn_done","knn done or not",MS_NOT_AVAILABLE);
SG_ADD(&knn_dists,"knn_dists","knn distances",MS_NOT_AVAILABLE);
SG_ADD(&knn_indices,"knn_indices","knn indices",MS_NOT_AVAILABLE);
SG_ADD(&m_vec_id,"m_vec_id","id of vectors",MS_NOT_AVAILABLE);
SG_ADD(&m_knn_done,"knn_done","knn done or not",MS_NOT_AVAILABLE);
SG_ADD(&m_knn_dists,"m_knn_dists","knn distances",MS_NOT_AVAILABLE);
SG_ADD(&m_knn_indices,"knn_indices","knn indices",MS_NOT_AVAILABLE);
}
10 changes: 5 additions & 5 deletions src/shogun/multiclass/tree/NbodyTree.h
Expand Up @@ -69,7 +69,7 @@ class CNbodyTree : public CTreeMachine<NbodyTreeNodeData>
/** get final rearranged vector indices
* @return vector indices rearranged corresponding to the built tree
*/
SGVector<index_t> get_rearranged_vector_ids() const { return vec_id; }
SGVector<index_t> get_rearranged_vector_ids() const { return m_vec_id; }

/** build tree
*
Expand Down Expand Up @@ -314,7 +314,7 @@ class CNbodyTree : public CTreeMachine<NbodyTreeNodeData>
SGMatrix<float64_t> m_data;

/** vector id */
SGVector<index_t> vec_id;
SGVector<index_t> m_vec_id;

private:
/** leaf size */
Expand All @@ -324,13 +324,13 @@ class CNbodyTree : public CTreeMachine<NbodyTreeNodeData>
EDistanceType m_dist;

/** knn query done or not */
bool knn_done;
bool m_knn_done;

/** knn distances */
SGMatrix<float64_t> knn_dists;
SGMatrix<float64_t> m_knn_dists;

/** knn indices */
SGMatrix<index_t> knn_indices;
SGMatrix<index_t> m_knn_indices;
};
} /* namespace shogun */

Expand Down

0 comments on commit b972fb5

Please sign in to comment.