Skip to content

Commit

Permalink
Merge pull request #3662 from abhinavrai44/distance
Browse files Browse the repository at this point in the history
Port DistanceMachine.cpp to use OpenMP
  • Loading branch information
vigsterkr committed Mar 14, 2017
2 parents 35d9c9d + 0ec785c commit 7a5305f
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 179 deletions.
12 changes: 12 additions & 0 deletions src/shogun/distance/Distance.cpp
Expand Up @@ -216,6 +216,18 @@ float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
return compute(idx_a, idx_b);
}

void CDistance::run_distance_rhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a)
{
for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
result.vector[i] = this->distance(idx_a,idx_start);
}

void CDistance::run_distance_lhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b)
{
for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
result.vector[i] = this->distance(idx_start,idx_b);
}

void CDistance::do_precompute_matrix()
{
int32_t num_left=lhs->get_num_vectors();
Expand Down
22 changes: 22 additions & 0 deletions src/shogun/distance/Distance.h
Expand Up @@ -335,6 +335,28 @@ class CDistance : public CSGObject
return lhs==rhs;
}

/**
* Function for computing distance values
*
* @param result array of distance values
* @param idx_r_start iteration start value
* @param idx_start start
* @param idx_stop iteration end value
* @param idx_a feature vector a at idx_a
*/
void run_distance_rhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a);

/**
* Function for computing distance values
*
* @param result array of distance values
* @param idx_r_start iteration start value
* @param idx_start start
* @param idx_stop iteration end value
* @param idx_b feature vector b at idx_b
*/
void run_distance_lhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b);

protected:

/// run distance thread
Expand Down
208 changes: 48 additions & 160 deletions src/shogun/machine/DistanceMachine.cpp
Expand Up @@ -12,19 +12,11 @@
#include <shogun/distance/Distance.h>
#include <shogun/base/Parameter.h>

using namespace shogun;
#ifdef HAVE_OPENMP
#include <omp.h>
#endif

#ifndef DOXYGEN_SHOULD_SKIP_THIS
struct D_THREAD_PARAM
{
CDistance* d;
float64_t* r;
int32_t idx_r_start;
int32_t idx_start;
int32_t idx_stop;
int32_t idx_comp;
};
#endif // DOXYGEN_SHOULD_SKIP_THIS
using namespace shogun;

CDistanceMachine::CDistanceMachine()
: CMachine()
Expand All @@ -49,170 +41,68 @@ void CDistanceMachine::init()
m_parameters->add((CSGObject**)&distance, "distance", "Distance to use");
}

void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b)
void CDistanceMachine::distances_lhs(SGVector<float64_t>& result, index_t idx_a1, index_t idx_a2, index_t idx_b)
{
// TODO: port to use OpenMP backend instead of pthread
#ifdef HAVE_PTHREAD
int32_t num_threads=parallel->get_num_threads();
#else
int32_t num_threads=1;
#endif
ASSERT(num_threads>0)
int32_t num_threads;
int32_t num_vec;
int32_t step;

ASSERT(result)

if (num_threads < 2)
#pragma omp parallel shared(num_threads, step)
{
D_THREAD_PARAM param;
param.d=distance;
param.r=result;
param.idx_r_start=idx_a1;
param.idx_start=idx_a1;
param.idx_stop=idx_a2+1;
param.idx_comp=idx_b;

run_distance_thread_lhs((void*) &param);
}
#ifdef HAVE_PTHREAD
else
{
pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
int32_t num_vec=idx_a2-idx_a1+1;
int32_t step= num_vec/num_threads;
int32_t t;

pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

for (t=0; t<num_threads-1; t++)
#ifdef HAVE_OPENMP
#pragma omp single
{
params[t].d = distance;
params[t].r = result;
params[t].idx_r_start=t*step;
params[t].idx_start = (t*step)+idx_a1;
params[t].idx_stop = ((t+1)*step)+idx_a1;
params[t].idx_comp=idx_b;

pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_lhs, (void*)&params[t]);
num_threads=omp_get_num_threads();
num_vec=idx_a2-idx_a1+1;
step=num_vec/num_threads;
}
params[t].d = distance;
params[t].r = result;
params[t].idx_r_start=t*step;
params[t].idx_start = (t*step)+idx_a1;
params[t].idx_stop = idx_a2+1;
params[t].idx_comp=idx_b;

run_distance_thread_lhs(&params[t]);

for (t=0; t<num_threads-1; t++)
pthread_join(threads[t], NULL);
int32_t thread_num=omp_get_thread_num();

pthread_attr_destroy(&attr);
SG_FREE(params);
SG_FREE(threads);
}
index_t idx_r_start = thread_num * step;
index_t idx_start = (thread_num * step) + idx_a1;
index_t idx_stop = (thread_num==(num_threads - 1)) ? (idx_a2 + 1) : ((thread_num + 1) * step) + idx_a1;
distance->run_distance_lhs(result, idx_r_start, idx_start, idx_stop, idx_b);
#else
index_t idx_r_start = idx_a1;
index_t idx_start = idx_a1;
index_t idx_stop = idx_a2 + 1;
distance->run_distance_lhs(result, idx_r_start, idx_start, idx_stop, idx_b);
#endif
}
}

void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a)
void CDistanceMachine::distances_rhs(SGVector<float64_t>& result, index_t idx_b1, index_t idx_b2, index_t idx_a)
{
// TODO: port to use OpenMP backend instead of pthread
#ifdef HAVE_PTHREAD
int32_t num_threads=parallel->get_num_threads();
#else
int32_t num_threads=1;
#endif
ASSERT(num_threads>0)
int32_t num_threads;
int32_t num_vec;
int32_t step;

ASSERT(result)

if (num_threads < 2)
#pragma omp parallel shared(num_threads, step)
{
D_THREAD_PARAM param;
param.d=distance;
param.r=result;
param.idx_r_start=idx_b1;
param.idx_start=idx_b1;
param.idx_stop=idx_b2+1;
param.idx_comp=idx_a;

run_distance_thread_rhs((void*) &param);
}
#ifdef HAVE_PTHREAD
else
{
pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads);
int32_t num_vec=idx_b2-idx_b1+1;
int32_t step= num_vec/num_threads;
int32_t t;

pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

for (t=0; t<num_threads-1; t++)
#ifdef HAVE_OPENMP
#pragma omp single
{
params[t].d = distance;
params[t].r = result;
params[t].idx_r_start=t*step;
params[t].idx_start = (t*step)+idx_b1;
params[t].idx_stop = ((t+1)*step)+idx_b1;
params[t].idx_comp=idx_a;

pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_rhs, (void*)&params[t]);
num_threads=omp_get_num_threads();
num_vec=idx_b2-idx_b1+1;
step=num_vec/num_threads;
}
params[t].d = distance;
params[t].r = result;
params[t].idx_r_start=t*step;
params[t].idx_start = (t*step)+idx_b1;
params[t].idx_stop = idx_b2+1;
params[t].idx_comp=idx_a;

run_distance_thread_rhs(&params[t]);
int32_t thread_num=omp_get_thread_num();

for (t=0; t<num_threads-1; t++)
pthread_join(threads[t], NULL);

pthread_attr_destroy(&attr);
SG_FREE(params);
SG_FREE(threads);
}
index_t idx_r_start = thread_num * step;
index_t idx_start = (thread_num * step) + idx_b1;
index_t idx_stop = (thread_num==(num_threads - 1)) ? (idx_b2 + 1) : ((thread_num + 1) * step) + idx_b1;
distance->run_distance_rhs(result, idx_r_start, idx_start, idx_stop, idx_a);
#else
index_t idx_r_start = idx_b1;
index_t idx_start = idx_b1;
index_t idx_stop = idx_b2 + 1;
distance->run_distance_rhs(result, idx_r_start, idx_start, idx_stop, idx_a);
#endif
}

void* CDistanceMachine::run_distance_thread_lhs(void* p)
{
D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
CDistance* distance=params->d;
float64_t* res=params->r;
int32_t idx_res_start=params->idx_r_start;
int32_t idx_act=params->idx_start;
int32_t idx_stop=params->idx_stop;
int32_t idx_c=params->idx_comp;

for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
res[i] =distance->distance(idx_act,idx_c);

return NULL;
}

void* CDistanceMachine::run_distance_thread_rhs(void* p)
{
D_THREAD_PARAM* params= (D_THREAD_PARAM*) p;
CDistance* distance=params->d;
float64_t* res=params->r;
int32_t idx_res_start=params->idx_r_start;
int32_t idx_act=params->idx_start;
int32_t idx_stop=params->idx_stop;
int32_t idx_c=params->idx_comp;

for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++)
res[i] =distance->distance(idx_c,idx_act);

return NULL;
}
}

CMulticlassLabels* CDistanceMachine::apply_multiclass(CFeatures* data)
Expand Down Expand Up @@ -249,11 +139,11 @@ float64_t CDistanceMachine::apply_one(int32_t num)
SG_UNREF(lhs);

/* (multiple threads) calculate distances to all cluster centers */
float64_t* dists=SG_MALLOC(float64_t, num_clusters);
SGVector<float64_t> dists(num_clusters);
distances_lhs(dists, 0, num_clusters-1, num);

/* find cluster index with smallest distance */
float64_t result=dists[0];
float64_t result=dists.vector[0];
index_t best_index=0;
for (index_t i=1; i<num_clusters; ++i)
{
Expand All @@ -264,8 +154,6 @@ float64_t CDistanceMachine::apply_one(int32_t num)
}
}

SG_FREE(dists);

/* implicit cast */
return best_index;
}
Expand Down
18 changes: 2 additions & 16 deletions src/shogun/machine/DistanceMachine.h
Expand Up @@ -58,7 +58,7 @@ class CDistanceMachine : public CMachine
* @param idx_a2 last feature vector a2 at idx_a2
* @param idx_b feature vector b at idx_b
*/
void distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b);
void distances_lhs(SGVector<float64_t>& result, int32_t idx_a1, int32_t idx_a2, int32_t idx_b);

/**
* get distance functions for rhs feature vectors
Expand All @@ -69,7 +69,7 @@ class CDistanceMachine : public CMachine
* @param idx_b2 last feature vector a2 at idx_b2
* @param idx_a feature vector a at idx_a
*/
void distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a);
void distances_rhs(SGVector<float64_t>& result, int32_t idx_b1, int32_t idx_b2, int32_t idx_a);

/** Returns the name of the SGSerializable instance. It MUST BE
* the CLASS NAME without the prefixed `C'.
Expand Down Expand Up @@ -108,20 +108,6 @@ class CDistanceMachine : public CMachine
*/
virtual void store_model_features();

/**
* thread function for computing distance values
*
* @param p thread parameter
*/
static void* run_distance_thread_lhs(void* p);

/**
* thread function for computing distance values
*
* @param p thread parameter
*/
static void* run_distance_thread_rhs(void* p);

private:
void init();

Expand Down
6 changes: 3 additions & 3 deletions src/shogun/multiclass/KNN.cpp
Expand Up @@ -218,14 +218,14 @@ CMulticlassLabels* CKNN::classify_NN()

// assuming 0th train examples as nearest to i-th test example
int32_t out_idx = 0;
float64_t min_dist = distances[0];
float64_t min_dist = distances.vector[0];

// searching for nearest neighbor by comparing distances
for (j=0; j<m_train_labels.vlen; j++)
{
if (distances[j]<min_dist)
if (distances.vector[j]<min_dist)
{
min_dist = distances[j];
min_dist = distances.vector[j];
out_idx = j;
}
}
Expand Down

0 comments on commit 7a5305f

Please sign in to comment.