Skip to content

Commit

Permalink
Merge pull request #3322 from Saurabh7/multirf
Browse files Browse the repository at this point in the history
Parallel RF
  • Loading branch information
karlnapf committed Jul 4, 2016
2 parents 08da61f + ed8e439 commit 2d69784
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 12 deletions.
15 changes: 15 additions & 0 deletions src/shogun/features/DenseFeatures.cpp
Expand Up @@ -636,6 +636,21 @@ CFeatures* CDenseFeatures<ST>::copy_dimension_subset(SGVector<index_t> dims)
return result;
}

template<class ST>
CFeatures* CDenseFeatures<ST>::shallow_subset_copy()
{
CFeatures* shallow_copy_features=nullptr;

SG_SDEBUG("Using underlying feature matrix with %d dimensions and %d feature vectors!\n", num_features, num_vectors);
SGMatrix<ST> shallow_copy_matrix(feature_matrix);
shallow_copy_features=new CDenseFeatures<ST>(shallow_copy_matrix);
SG_REF(shallow_copy_features);
if (m_subset_stack->has_subsets())
shallow_copy_features->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());

return shallow_copy_features;
}

template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
ST* target)
{
Expand Down
4 changes: 4 additions & 0 deletions src/shogun/features/DenseFeatures.h
Expand Up @@ -515,6 +515,10 @@ template<class ST> class CDenseFeatures: public CDotFeatures
*/
static CDenseFeatures* obtain_from_generic(CFeatures* const base_features);

#ifndef SWIG // SWIG should skip this part
virtual CFeatures* shallow_subset_copy();
#endif

/** @return object name */
virtual const char* get_name() const { return "DenseFeatures"; }

Expand Down
9 changes: 9 additions & 0 deletions src/shogun/features/Features.h
Expand Up @@ -331,6 +331,15 @@ class CFeatures : public CSGObject
* @return whether this class supports compatible computation
*/
virtual bool get_feature_class_compatibility (EFeatureClass rhs) const;

#ifndef SWIG // SWIG should skip this part
virtual CFeatures* shallow_subset_copy()
{
SG_SNOTIMPLEMENTED;
return NULL;
}
#endif

private:
void init();

Expand Down
14 changes: 14 additions & 0 deletions src/shogun/labels/BinaryLabels.cpp
Expand Up @@ -138,3 +138,17 @@ void CBinaryLabels::scores_to_probabilities(float64_t a, float64_t b)

SG_DEBUG("leaving CBinaryLabels::scores_to_probabilities()\n")
}

CLabels* CBinaryLabels::shallow_subset_copy()
{
CLabels* shallow_copy_labels=nullptr;
SGVector<float64_t> shallow_copy_vector(m_labels);
shallow_copy_labels=new CBinaryLabels(m_labels.size());
SG_REF(shallow_copy_labels);

((CDenseLabels*) shallow_copy_labels)->set_labels(shallow_copy_vector);
if (m_subset_stack->has_subsets())
shallow_copy_labels->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());

return shallow_copy_labels;
}
4 changes: 4 additions & 0 deletions src/shogun/labels/BinaryLabels.h
Expand Up @@ -116,6 +116,10 @@ class CBinaryLabels : public CDenseLabels
{
return "BinaryLabels";
}
#ifndef SWIG // SWIG should skip this part
virtual CLabels* shallow_subset_copy();
#endif

};
}
#endif
8 changes: 8 additions & 0 deletions src/shogun/labels/Labels.h
Expand Up @@ -123,6 +123,14 @@ class CLabels : public CSGObject
*/
virtual SGVector<float64_t> get_values();

#ifndef SWIG // SWIG should skip this part
virtual CLabels* shallow_subset_copy()
{
SG_SNOTIMPLEMENTED;
return NULL;
}
#endif

private:
void init();

Expand Down
13 changes: 13 additions & 0 deletions src/shogun/labels/MulticlassLabels.cpp
Expand Up @@ -134,3 +134,16 @@ int32_t CMulticlassLabels::get_num_classes()
SGVector<float64_t> unique=get_unique_labels();
return unique.vlen;
}

CLabels* CMulticlassLabels::shallow_subset_copy()
{
CLabels* shallow_copy_labels=nullptr;
SGVector<float64_t> shallow_copy_vector(m_labels);
shallow_copy_labels=new CMulticlassLabels(m_labels.size());
SG_REF(shallow_copy_labels);
((CDenseLabels*) shallow_copy_labels)->set_labels(shallow_copy_vector);
if (m_subset_stack->has_subsets())
shallow_copy_labels->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());

return shallow_copy_labels;
}
3 changes: 3 additions & 0 deletions src/shogun/labels/MulticlassLabels.h
Expand Up @@ -123,6 +123,9 @@ class CMulticlassLabels : public CDenseLabels

/** @return object name */
virtual const char* get_name() const { return "MulticlassLabels"; }
#ifndef SWIG // SWIG should skip this part
virtual CLabels* shallow_subset_copy();
#endif

private:
/** initialises and register parameters */
Expand Down
13 changes: 13 additions & 0 deletions src/shogun/labels/RegressionLabels.cpp
Expand Up @@ -25,3 +25,16 @@ ELabelType CRegressionLabels::get_label_type() const
return LT_REGRESSION;
}

CLabels* CRegressionLabels::shallow_subset_copy()
{
CLabels* shallow_copy_labels=nullptr;
SGVector<float64_t> shallow_copy_vector(m_labels);
shallow_copy_labels=new CRegressionLabels(m_labels.size());
SG_REF(shallow_copy_labels);

((CDenseLabels*) shallow_copy_labels)->set_labels(shallow_copy_vector);
if (m_subset_stack->has_subsets())
shallow_copy_labels->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());

return shallow_copy_labels;
}
5 changes: 5 additions & 0 deletions src/shogun/labels/RegressionLabels.h
Expand Up @@ -65,6 +65,11 @@ class CRegressionLabels : public CDenseLabels

/** @return object name */
virtual const char* get_name() const { return "RegressionLabels"; }

#ifndef SWIG // SWIG should skip this part
virtual CLabels* shallow_subset_copy();
#endif

};
}
#endif
46 changes: 34 additions & 12 deletions src/shogun/machine/BaggingMachine.cpp
Expand Up @@ -76,9 +76,8 @@ SGVector<float64_t> CBaggingMachine::apply_get_outputs(CFeatures* data)
SGMatrix<float64_t> output(data->get_num_vectors(), m_num_bags);
output.zero();

/*
#pragma omp parallel for num_threads(parallel->get_num_threads())
*/

#pragma omp parallel for
for (int32_t i = 0; i < m_num_bags; ++i)
{
CMachine* m = dynamic_cast<CMachine*>(m_bags->get_element(i));
Expand Down Expand Up @@ -131,16 +130,30 @@ bool CBaggingMachine::train_machine(CFeatures* data)

/*
TODO: enable multi-threaded learning. This requires views support
on CFeatures
#pragma omp parallel for num_threads(parallel->get_num_threads())
*/
on CFeatures*/
#pragma omp parallel for
for (int32_t i = 0; i < m_num_bags; ++i)
{
CMachine* c=dynamic_cast<CMachine*>(m_machine->clone());
ASSERT(c != NULL);
SGVector<index_t> idx(get_bag_size());
idx.random(0, m_features->get_num_vectors()-1);
m_labels->add_subset(idx);

CFeatures* features;
CLabels* labels;

if (get_global_parallel()->get_num_threads()==1)
{
features = m_features;
labels = m_labels;
}
else
{
features = m_features->shallow_subset_copy();
labels = m_labels->shallow_subset_copy();
}

labels->add_subset(idx);
/* TODO:
if it's a binary labeling ensure that
there's always samples of both classes
Expand All @@ -157,19 +170,28 @@ bool CBaggingMachine::train_machine(CFeatures* data)
}
}
*/
m_features->add_subset(idx);
features->add_subset(idx);
set_machine_parameters(c,idx);
c->set_labels(m_labels);
c->train(m_features);
m_features->remove_subset();
m_labels->remove_subset();
c->set_labels(labels);
c->train(features);
features->remove_subset();
labels->remove_subset();

#pragma omp critical
{
// get out of bag indexes
CDynamicArray<index_t>* oob = get_oob_indices(idx);
m_oob_indices->push_back(oob);

// add trained machine to bag array
m_bags->push_back(c);
}

if (get_global_parallel()->get_num_threads()!=1)
{
SG_UNREF(features);
SG_UNREF(labels);
}

SG_UNREF(c);
}
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/features/DenseFeatures_unittest.cc
Expand Up @@ -119,3 +119,30 @@ TEST(DenseFeaturesTest, copy_dimension_subset_with_subsets)
SG_UNREF(features);
SG_UNREF(f_reduced);
}

TEST(DenseFeaturesTest, shallow_copy_subset_data)
{
index_t dim=5;
index_t n=10;

SGMatrix<float64_t> data(dim, n);
std::iota(data.data(), data.data()+data.size(), 1);

SGVector<index_t> inds(n/2);
inds.random(0, n-1);

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
features->add_subset(inds);
CFeatures* features_copy = features->shallow_subset_copy();

SGMatrix<float64_t> orig_matrix=features->get_feature_matrix();
SGMatrix<float64_t> copy_matrix=static_cast<CDenseFeatures<float64_t>*>(features_copy)->get_feature_matrix();


for (index_t i=0; i<dim; ++i)
for (index_t j=0; j<inds.size(); ++j)
EXPECT_EQ(orig_matrix(i,j), copy_matrix(i,j));

SG_UNREF(features_copy);
SG_UNREF(features);
}

0 comments on commit 2d69784

Please sign in to comment.