Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mapped labels #4650

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/shogun/base/SGObject.h
Expand Up @@ -692,6 +692,24 @@ class CSGObject
demangled_type<T>().c_str());
return nullptr;
}

/** Specializes the object to the specified type.
* Throws exception if the object cannot be specialized.
*
* @return The requested type
*/
template<class T> const T* as() const
{
auto c = dynamic_cast<const T*>(this);
if (c)
return c;

SG_SERROR(
"Object of type %s cannot be converted to type %s.\n",
this->get_name(),
demangled_type<T>().c_str());
return nullptr;
}
#ifndef SWIG
/**
* Get parameters observable
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/classifier/AveragedPerceptron.cpp
Expand Up @@ -48,7 +48,7 @@ void CAveragedPerceptron::init_model(CFeatures* data)
}
ASSERT(features)

SGVector<int32_t> train_labels = binary_labels(m_labels)->get_int_labels();
SGVector<int32_t> train_labels = m_labels->as<CBinaryLabels>()->get_int_labels();
int32_t num_feat = features->get_dim_feature_space();
int32_t num_vec = features->get_num_vectors();
ASSERT(num_vec == train_labels.vlen)
Expand All @@ -68,7 +68,7 @@ void CAveragedPerceptron::iteration()
bool converged = true;

SGVector<float64_t> w = get_w();
auto labels = binary_labels(m_labels)->get_int_labels();
auto labels = m_labels->as<CBinaryLabels>()->get_int_labels();
auto iter_train_labels = labels.begin();

int32_t num_vec = features->get_num_vectors();
Expand All @@ -92,4 +92,4 @@ void CAveragedPerceptron::iteration()
num_prev_weights++;
}
m_complete = converged;
}
}
8 changes: 5 additions & 3 deletions src/shogun/classifier/LDA.cpp
Expand Up @@ -14,6 +14,7 @@
#include <shogun/preprocessor/FisherLDA.h>
#include <shogun/solver/LDACanVarSolver.h>
#include <shogun/solver/LDASolver.h>
#include <shogun/labels/MappedMulticlassLabels.h>
#include <vector>

using namespace Eigen;
Expand Down Expand Up @@ -66,7 +67,6 @@ bool CLDA::train_machine_templated(CDenseFeatures<ST>* data)
{
index_t num_feat = data->get_num_features();
index_t num_vec = data->get_num_vectors();
;

bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);

Expand All @@ -79,7 +79,8 @@ bool CLDA::train_machine_templated(CDenseFeatures<ST>* data)
template <typename ST>
bool CLDA::solver_svd(CDenseFeatures<ST>* data)
{
auto labels = multiclass_labels(m_labels);
auto labels = some<CMappedMulticlassLabels>(m_labels);

REQUIRE(
labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
labels->get_num_classes())
Expand Down Expand Up @@ -113,7 +114,8 @@ bool CLDA::solver_svd(CDenseFeatures<ST>* data)
template <typename ST>
bool CLDA::solver_classic(CDenseFeatures<ST>* data)
{
auto labels = multiclass_labels(m_labels);
auto labels = Some<CMulticlassLabels>::from_raw(MappedLabels<CMulticlassLabels>::wrap_if_necessary<CMappedMulticlassLabels>(m_labels));

REQUIRE(
labels->get_num_classes() == 2, "Number of classes (%d) must be 2\n",
labels->get_num_classes())
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/Perceptron.cpp
Expand Up @@ -60,7 +60,7 @@ void CPerceptron::iteration()
bool converged = true;
SGVector<float64_t> w = get_w();

auto labels = binary_labels(m_labels)->get_int_labels();
auto labels = m_labels->as<CBinaryLabels>()->get_int_labels();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverting the old mapping approach which was invasive. Now algorithms simply assume they are given the right labels (as the case before any mapping was done)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good!

auto iter_train_labels = labels.begin();

for (const auto& v : DotIterator(features))
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/classifier/svm/LibLinear.cpp
Expand Up @@ -139,10 +139,10 @@ bool CLibLinear::train_machine(CFeatures* data)
double Cp = get_C1();
double Cn = get_C2();

auto labels = binary_labels(m_labels);
auto labels = m_labels->as<CBinaryLabels>();
for (int32_t i = 0; i < prob.l; i++)
{
prob.y[i] = labels->get_int_label(i);
prob.y[i] = labels->get_label(i);
if (prob.y[i] == +1)
Cs[i] = get_C1();
else if (prob.y[i] == -1)
Expand Down
5 changes: 3 additions & 2 deletions src/shogun/classifier/svm/LibSVM.cpp
Expand Up @@ -50,7 +50,8 @@ bool CLibSVM::train_machine(CFeatures* data)

struct svm_node* x_space;

ASSERT(m_labels && m_labels->get_num_labels())
REQUIRE(m_labels && m_labels->get_num_labels(), "No labels provided.\n")
REQUIRE(kernel, "No kernel provided.\n")

if (data)
{
Expand Down Expand Up @@ -94,7 +95,7 @@ bool CLibSVM::train_machine(CFeatures* data)

x_space=SG_MALLOC(struct svm_node, 2*problem.l);

auto labels = binary_labels(m_labels);
auto labels = m_labels->as<CBinaryLabels>();
for (int32_t i=0; i<problem.l; i++)
{
problem.y[i] = labels->get_label(i);
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/svm/MPDSVM.cpp
Expand Up @@ -30,7 +30,7 @@ CMPDSVM::~CMPDSVM()

bool CMPDSVM::train_machine(CFeatures* data)
{
auto labels = binary_labels(m_labels);
auto labels = m_labels->as<CBinaryLabels>();

ASSERT(kernel)

Expand Down
6 changes: 3 additions & 3 deletions src/shogun/classifier/svm/NewtonSVM.cpp
Expand Up @@ -57,7 +57,7 @@ void CNewtonSVM::init_model(CFeatures* data)

ASSERT(features)

SGVector<float64_t> train_labels = binary_labels(m_labels)->get_labels();
SGVector<float64_t> train_labels = m_labels->as<CBinaryLabels>()->get_labels();
int32_t num_feat=features->get_dim_feature_space();
int32_t num_vec=features->get_num_vectors();

Expand Down Expand Up @@ -144,7 +144,7 @@ void CNewtonSVM::iteration()

void CNewtonSVM::line_search_linear(const SGVector<float64_t> d)
{
SGVector<float64_t> Y = binary_labels(m_labels)->get_labels();
SGVector<float64_t> Y = m_labels->as<CBinaryLabels>()->get_labels();
SGVector<float64_t> outz(x_n);
SGVector<float64_t> temp1(x_n);
SGVector<float64_t> temp1forout(x_n);
Expand Down Expand Up @@ -215,7 +215,7 @@ void CNewtonSVM::line_search_linear(const SGVector<float64_t> d)
void CNewtonSVM::obj_fun_linear()
{
SGVector<float64_t> weights = get_w();
SGVector<float64_t> v = binary_labels(m_labels)->get_labels();
SGVector<float64_t> v = m_labels->as<CBinaryLabels>()->get_labels();

for (int32_t i=0; i<x_n; i++)
{
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/classifier/svm/SVMLight.cpp
Expand Up @@ -310,7 +310,7 @@ void CSVMLight::svm_learn()
int32_t trainpos=0, trainneg=0 ;
ASSERT(m_labels)

SGVector<int32_t> lab = binary_labels(m_labels)->get_int_labels();
SGVector<int32_t> lab = m_labels->as<CBinaryLabels>()->get_int_labels();
int32_t totdoc=lab.vlen;
ASSERT(lab.vector && lab.vlen)
int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/evaluation/ClusteringEvaluation.cpp
Expand Up @@ -39,8 +39,8 @@ void CClusteringEvaluation::best_map(CLabels* predicted, CLabels* ground_truth)
ASSERT(predicted->get_label_type() == LT_MULTICLASS)
ASSERT(ground_truth->get_label_type() == LT_MULTICLASS)

SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_unique_labels();
SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_unique_labels();
SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_labels().unique();
SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_labels().unique();

SGVector<int32_t> predicted_ilabels=((CMulticlassLabels*) predicted)->get_int_labels();
SGVector<int32_t> groundtruth_ilabels=((CMulticlassLabels*) ground_truth)->get_int_labels();
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/evaluation/ClusteringMutualInformation.cpp
Expand Up @@ -15,8 +15,8 @@ float64_t CClusteringMutualInformation::evaluate(CLabels* predicted, CLabels* gr
ASSERT(predicted && ground_truth)
ASSERT(predicted->get_label_type() == LT_MULTICLASS)
ASSERT(ground_truth->get_label_type() == LT_MULTICLASS)
SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_unique_labels();
SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_unique_labels();
SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_labels().unique();
SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_labels().unique();

if (label_p.vlen != label_g.vlen)
SG_ERROR("Number of classes are different\n")
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/evaluation/ContingencyTableEvaluation.cpp
Expand Up @@ -19,11 +19,11 @@ float64_t CContingencyTableEvaluation::evaluate(CLabels* predicted, CLabels* gro
get_name(), predicted->get_num_labels(),
ground_truth->get_num_labels());

auto predicted_binary = binary_labels(predicted);
auto ground_truth_binary = binary_labels(ground_truth);
auto predicted_binary = predicted->as<CBinaryLabels>();
auto ground_truth_binary = ground_truth->as<CBinaryLabels>();

ground_truth->ensure_valid();
compute_scores(predicted_binary.get(), ground_truth_binary.get());
compute_scores(predicted_binary, ground_truth_binary);
switch (m_type)
{
case ACCURACY:
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/evaluation/MulticlassAccuracy.cpp
Expand Up @@ -16,8 +16,8 @@ float64_t CMulticlassAccuracy::evaluate(CLabels* predicted, CLabels* ground_trut
ASSERT(predicted && ground_truth)
ASSERT(predicted->get_num_labels() == ground_truth->get_num_labels())
int32_t length = predicted->get_num_labels();
auto predicted_mc = multiclass_labels(predicted);
auto ground_truth_mc = multiclass_labels(ground_truth);
auto predicted_mc = predicted->as<CMulticlassLabels>();
auto ground_truth_mc = ground_truth->as<CMulticlassLabels>();
int32_t correct = 0;
if (m_ignore_rejects)
{
Expand Down
38 changes: 6 additions & 32 deletions src/shogun/labels/BinaryLabels.cpp
Expand Up @@ -8,6 +8,7 @@

#include <shogun/base/range.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/labels/DenseLabels.h>
#include <shogun/lib/SGVector.h>
#include <shogun/mathematics/Statistics.h>
Expand Down Expand Up @@ -63,14 +64,13 @@ CBinaryLabels::CBinaryLabels(CFile * loader) : CDenseLabels(loader)

bool CBinaryLabels::is_valid() const
{
if (!CDenseLabels::is_valid())
return false;
// if (!CDenseLabels::is_valid())
// return false;

int32_t subset_size = get_num_labels();
for (int32_t i = 0; i < subset_size; i++)
for (auto i : range(get_num_labels()))
{
int32_t real_i = m_subset_stack->subset_idx_conversion(i);
if (m_labels[real_i] != +1.0 && m_labels[real_i] != -1.0)
auto val = get_label(i);
if (val != +1.0 && val != -1.0)
return false;
}
return true;
Expand Down Expand Up @@ -138,29 +138,3 @@ CLabels* CBinaryLabels::duplicate() const
{
return new CBinaryLabels(*this);
}

namespace shogun
{
Some<CBinaryLabels> binary_labels(CLabels* orig)
{
REQUIRE(orig, "No labels provided.\n");
try
{
switch (orig->get_label_type())
{
case LT_BINARY:
return Some<CBinaryLabels>::from_raw((CBinaryLabels*)orig);
default:
SG_SNOTIMPLEMENTED
}
}
catch (const ShogunException& e)
{
SG_SERROR(
"Cannot convert %s to binary labels: %s\n", orig->get_name(),
e.what());
}

return Some<CBinaryLabels>::from_raw(nullptr);
}
} // namespace shogun
4 changes: 0 additions & 4 deletions src/shogun/labels/BinaryLabels.h
Expand Up @@ -13,7 +13,6 @@

#include <shogun/labels/DenseLabels.h>
#include <shogun/labels/LabelTypes.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/lib/common.h>

namespace shogun
Expand Down Expand Up @@ -129,8 +128,5 @@ class CBinaryLabels : public CDenseLabels
#endif
};

#ifndef SWIG
Some<CBinaryLabels> binary_labels(CLabels* orig);
#endif // SWIG
}
#endif
9 changes: 5 additions & 4 deletions src/shogun/labels/DenseLabels.cpp
Expand Up @@ -197,11 +197,12 @@ float64_t CDenseLabels::get_label(int32_t idx) const
int32_t CDenseLabels::get_int_label(int32_t idx)
{
int32_t real_num=m_subset_stack->subset_idx_conversion(idx);
ASSERT(m_labels.vector && idx<get_num_labels())
if (m_labels.vector[real_num] != float64_t((int32_t(m_labels.vector[real_num]))))
SG_ERROR("label[%d]=%g is not an integer\n", m_labels.vector[real_num])
ASSERT(idx<get_num_labels())
auto label = get_label(real_num);
if (label != float64_t((int32_t(label))))
SG_ERROR("label[%d]=%g is not an integer\n", label)

return int32_t(m_labels.vector[real_num]);
return int32_t(label);
}

int32_t CDenseLabels::get_num_labels() const
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/labels/DenseLabels.h
Expand Up @@ -108,7 +108,7 @@ namespace shogun
*
* @return labels, a copy if a subset is set
*/
SGVector<float64_t> get_labels() const;
virtual SGVector<float64_t> get_labels() const;

/** get label
*
Expand All @@ -117,7 +117,7 @@ namespace shogun
* @param idx index of label to get
* @return value of label
*/
float64_t get_label(int32_t idx) const;
virtual float64_t get_label(int32_t idx) const;

/** get label
*
Expand Down
43 changes: 43 additions & 0 deletions src/shogun/labels/MappedBinaryLabels.cpp
@@ -0,0 +1,43 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Heiko Strathmann
*/

#include <shogun/labels/MappedBinaryLabels.h>
#include <shogun/labels/MulticlassLabels.h>


using namespace shogun;

std::pair<LabelMap, LabelMap> CMappedBinaryLabels::create_mapping(const CLabels* orig) const
{
LabelMap to_internal;
LabelMap from_internal;

switch (orig->get_label_type())
{
case LT_BINARY:
break;
case LT_MULTICLASS:
case LT_REGRESSION:
{
auto dense = static_cast<const CDenseLabels*>(orig);
auto unique = dense->get_labels().unique();
REQUIRE(unique.size()<=2, "Cannot use %d label values as binary labels.\n", unique.size());
to_internal[unique[0]] = -1;
from_internal[-1] = unique[0];

if (unique.size()==2)
{
to_internal[unique[1]] = 1;
from_internal[+1] = unique[1];
}
break;
}
default:
SG_ERROR("Cannot use %s as %s.", orig->get_name(), orig->get_name());
}

return std::make_pair(to_internal, from_internal);
}