From d04bb506f95b24424ffb6fc90ff31bceec2446e1 Mon Sep 17 00:00:00 2001 From: Sergey Lisitsyn Date: Mon, 25 Feb 2013 15:03:14 +0400 Subject: [PATCH] Added convenience constructor for BinaryLabels --- src/shogun/labels/BinaryLabels.cpp | 497 +++++++++++++++-------------- src/shogun/labels/BinaryLabels.h | 201 ++++++------ 2 files changed, 357 insertions(+), 341 deletions(-) diff --git a/src/shogun/labels/BinaryLabels.cpp b/src/shogun/labels/BinaryLabels.cpp index e612e7d599a..3ef8d9c8ecc 100644 --- a/src/shogun/labels/BinaryLabels.cpp +++ b/src/shogun/labels/BinaryLabels.cpp @@ -1,244 +1,253 @@ -#include -#include - -using namespace shogun; - -CBinaryLabels::CBinaryLabels() : CDenseLabels() -{ -} - -CBinaryLabels::CBinaryLabels(int32_t num_labels) : CDenseLabels(num_labels) -{ -} - -CBinaryLabels::CBinaryLabels(SGVector src, float64_t threshold) : CDenseLabels() -{ - SGVector labels(src.vlen); - for (int32_t i=0; i=0 ? +1.0 : -1.0; - set_labels(labels); - set_values(src); -} - -CBinaryLabels::CBinaryLabels(CFile* loader) : CDenseLabels(loader) -{ -} - -CBinaryLabels* CBinaryLabels::obtain_from_generic(CLabels* base_labels) -{ - if ( base_labels->get_label_type() == LT_BINARY ) - return (CBinaryLabels*) base_labels; - else - SG_SERROR("base_labels must be of dynamic type CBinaryLabels") - - return NULL; -} - - -void CBinaryLabels::ensure_valid(const char* context) -{ - CDenseLabels::ensure_valid(context); - bool found_plus_one=false; - bool found_minus_one=false; - - int32_t subset_size=get_num_labels(); - for (int32_t i=0; isubset_idx_conversion(i); - if (m_labels[real_i]==+1.0) - found_plus_one=true; - else if (m_labels[real_i]==-1.0) - found_minus_one=true; - else - { - SG_ERROR("%s%sNot a two class labeling label[%d]=%f (only +1/-1 " - "allowed)\n", context?context:"", context?": ":"", i, m_labels[real_i]); - } - } - - if (!found_plus_one) - { - SG_ERROR("%s%sNot a two class labeling - no positively labeled examples found\n", - context?context:"", context?": ":""); - } - - if (!found_minus_one) - { - SG_ERROR("%s%sNot a two class labeling - no negatively labeled examples found\n", - context?context:"", context?": ":""); - } -} - -ELabelType CBinaryLabels::get_label_type() -{ - return LT_BINARY; -} - -void CBinaryLabels::scores_to_probabilities() -{ - SG_DEBUG("entering CBinaryLabels::scores_to_probabilities()\n") - - REQUIRE(m_current_values.vector, "%s::scores_to_probabilities() requires " - "values vector!\n", get_name()); - - /* count prior0 and prior1 if needed */ - int32_t prior0=0; - int32_t prior1=0; - SG_DEBUG("counting number of positive and negative labels\n") - { - for (index_t i=0; i0) - prior1++; - else - prior0++; - } - } - SG_DEBUG("%d pos; %d neg\n", prior1, prior0) - - /* parameter setting */ - /* maximum number of iterations */ - index_t maxiter=100; - - /* minimum step taken in line search */ - float64_t minstep=1E-10; - - /* for numerically strict pd of hessian */ - float64_t sigma=1E-12; - float64_t eps=1E-5; - - /* construct target support */ - float64_t hiTarget=(prior1+1.0)/(prior1+2.0); - float64_t loTarget=1/(prior0+2.0); - index_t length=prior1+prior0; - - SGVector t(length); - for (index_t i=0; i0) - t[i]=hiTarget; - else - t[i]=loTarget; - } - - /* initial Point and Initial Fun Value */ - /* result parameters of sigmoid */ - float64_t a=0; - float64_t b=CMath::log((prior0+1.0)/(prior1+1.0)); - float64_t fval=0.0; - - for (index_t i=0; i=0) - fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); - else - fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); - } - - index_t it; - float64_t g1; - float64_t g2; - for (it=0; it=0) - { - p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB)); - q=1.0/(1.0+CMath::exp(-fApB)); - } - else - { - p=1.0/(1.0+CMath::exp(fApB)); - q=CMath::exp(fApB)/(1.0+CMath::exp(fApB)); - } - - float64_t d2=p*q; - h11+=m_current_values[i]*m_current_values[i]*d2; - h22+=d2; - h21+=m_current_values[i]*d2; - float64_t d1=t[i]-p; - g1+=m_current_values[i]*d1; - g2+=d1; - } - - /* Stopping Criteria */ - if (CMath::abs(g1)=minstep) - { - float64_t newA=a+stepsize*dA; - float64_t newB=b+stepsize*dB; - - /* New function value */ - float64_t newf=0.0; - for (index_t i=0; i=0) - newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); - else - newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); - } - - /* Check sufficient decrease */ - if (newf=maxiter-1) - { - SG_WARNING("%s::scores_to_probabilities(): reaching maximal iterations," - " g1=%f, g2=%f\n", get_name(), g1, g2); - } - - SG_DEBUG("fitted sigmoid: a=%f, b=%f\n", a, b) - - /* now the sigmoid is fitted, convert all values to probabilities */ - for (index_t i=0; i=0 ? CMath::exp(-fApB)/(1.0+exp(-fApB)) : - 1.0/(1+CMath::exp(fApB)); - } - - SG_DEBUG("leaving CBinaryLabels::scores_to_probabilities()\n") -} +#include +#include + +using namespace shogun; + +CBinaryLabels::CBinaryLabels() : CDenseLabels() +{ +} + +CBinaryLabels::CBinaryLabels(int32_t num_labels) : CDenseLabels(num_labels) +{ +} + +CBinaryLabels::CBinaryLabels(SGVector src) : CDenseLabels() +{ + SGVector values(src.vlen); + for (int32_t i=0; i src, float64_t threshold) : CDenseLabels() +{ + SGVector labels(src.vlen); + for (int32_t i=0; i=0 ? +1.0 : -1.0; + set_labels(labels); + set_values(src); +} + +CBinaryLabels::CBinaryLabels(CFile* loader) : CDenseLabels(loader) +{ +} + +CBinaryLabels* CBinaryLabels::obtain_from_generic(CLabels* base_labels) +{ + if ( base_labels->get_label_type() == LT_BINARY ) + return (CBinaryLabels*) base_labels; + else + SG_SERROR("base_labels must be of dynamic type CBinaryLabels") + + return NULL; +} + + +void CBinaryLabels::ensure_valid(const char* context) +{ + CDenseLabels::ensure_valid(context); + bool found_plus_one=false; + bool found_minus_one=false; + + int32_t subset_size=get_num_labels(); + for (int32_t i=0; isubset_idx_conversion(i); + if (m_labels[real_i]==+1.0) + found_plus_one=true; + else if (m_labels[real_i]==-1.0) + found_minus_one=true; + else + { + SG_ERROR("%s%sNot a two class labeling label[%d]=%f (only +1/-1 " + "allowed)\n", context?context:"", context?": ":"", i, m_labels[real_i]); + } + } + + if (!found_plus_one) + { + SG_ERROR("%s%sNot a two class labeling - no positively labeled examples found\n", + context?context:"", context?": ":""); + } + + if (!found_minus_one) + { + SG_ERROR("%s%sNot a two class labeling - no negatively labeled examples found\n", + context?context:"", context?": ":""); + } +} + +ELabelType CBinaryLabels::get_label_type() +{ + return LT_BINARY; +} + +void CBinaryLabels::scores_to_probabilities() +{ + SG_DEBUG("entering CBinaryLabels::scores_to_probabilities()\n") + + REQUIRE(m_current_values.vector, "%s::scores_to_probabilities() requires " + "values vector!\n", get_name()); + + /* count prior0 and prior1 if needed */ + int32_t prior0=0; + int32_t prior1=0; + SG_DEBUG("counting number of positive and negative labels\n") + { + for (index_t i=0; i0) + prior1++; + else + prior0++; + } + } + SG_DEBUG("%d pos; %d neg\n", prior1, prior0) + + /* parameter setting */ + /* maximum number of iterations */ + index_t maxiter=100; + + /* minimum step taken in line search */ + float64_t minstep=1E-10; + + /* for numerically strict pd of hessian */ + float64_t sigma=1E-12; + float64_t eps=1E-5; + + /* construct target support */ + float64_t hiTarget=(prior1+1.0)/(prior1+2.0); + float64_t loTarget=1/(prior0+2.0); + index_t length=prior1+prior0; + + SGVector t(length); + for (index_t i=0; i0) + t[i]=hiTarget; + else + t[i]=loTarget; + } + + /* initial Point and Initial Fun Value */ + /* result parameters of sigmoid */ + float64_t a=0; + float64_t b=CMath::log((prior0+1.0)/(prior1+1.0)); + float64_t fval=0.0; + + for (index_t i=0; i=0) + fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); + else + fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); + } + + index_t it; + float64_t g1; + float64_t g2; + for (it=0; it=0) + { + p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB)); + q=1.0/(1.0+CMath::exp(-fApB)); + } + else + { + p=1.0/(1.0+CMath::exp(fApB)); + q=CMath::exp(fApB)/(1.0+CMath::exp(fApB)); + } + + float64_t d2=p*q; + h11+=m_current_values[i]*m_current_values[i]*d2; + h22+=d2; + h21+=m_current_values[i]*d2; + float64_t d1=t[i]-p; + g1+=m_current_values[i]*d1; + g2+=d1; + } + + /* Stopping Criteria */ + if (CMath::abs(g1)=minstep) + { + float64_t newA=a+stepsize*dA; + float64_t newB=b+stepsize*dB; + + /* New function value */ + float64_t newf=0.0; + for (index_t i=0; i=0) + newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); + else + newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); + } + + /* Check sufficient decrease */ + if (newf=maxiter-1) + { + SG_WARNING("%s::scores_to_probabilities(): reaching maximal iterations," + " g1=%f, g2=%f\n", get_name(), g1, g2); + } + + SG_DEBUG("fitted sigmoid: a=%f, b=%f\n", a, b) + + /* now the sigmoid is fitted, convert all values to probabilities */ + for (index_t i=0; i=0 ? CMath::exp(-fApB)/(1.0+exp(-fApB)) : + 1.0/(1+CMath::exp(fApB)); + } + + SG_DEBUG("leaving CBinaryLabels::scores_to_probabilities()\n") +} diff --git a/src/shogun/labels/BinaryLabels.h b/src/shogun/labels/BinaryLabels.h index cb9f95c80f4..6bf78134d3a 100644 --- a/src/shogun/labels/BinaryLabels.h +++ b/src/shogun/labels/BinaryLabels.h @@ -1,97 +1,104 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * Written (W) 1999-2009 Soeren Sonnenburg - * Written (W) 1999-2008 Gunnar Raetsch - * Written (W) 2011-2012 Heiko Strathmann - * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society - */ - -#ifndef _BINARY_LABELS__H__ -#define _BINARY_LABELS__H__ - -#include -#include -#include -#include - -namespace shogun -{ - class CFile; - class CDenseLabels; - -/** @brief Binary Labels for binary classification - * - * valid values for labels are +1/-1 - * - * Scores may be converted into calibrated probabilities using - * scores_to_probabilities(), which implements the method described in - * Lin, H., Lin, C., and Weng, R. (2007). - * A note on Platt's probabilistic outputs for support vector machines. - * Should only be used in conjunction with SVM. - */ -class CBinaryLabels : public CDenseLabels -{ - public: - /** default constructor */ - CBinaryLabels(); - - /** constructor - * - * @param num_labels number of labels - */ - CBinaryLabels(int32_t num_labels); - - /** constructor - * sets values from src vector - * sets labels with sign of src elements with added threshold - * - * @param src labels to set - * @param threshold threshold - */ - CBinaryLabels(SGVector src, float64_t threshold=0.0); - - /** constructor - * - * @param loader File object via which to load data - */ - CBinaryLabels(CFile* loader); - - /** helper method used to specialize a base class instance - * - * @param base_labels its dynamic type must be CBinaryLabels - */ - static CBinaryLabels* obtain_from_generic(CLabels* base_labels); - - /** Make sure the label is valid, otherwise raise SG_ERROR. - * - * possible with subset - * - * @param context optional message to convey the context - */ - virtual void ensure_valid(const char* context=NULL); - - /** get label type - * - * @return label type binary - */ - virtual ELabelType get_label_type(); - - /** Converts all scores to calibrated probabilities by fitting a - * sigmoid function using the method described in - * Lin, H., Lin, C., and Weng, R. (2007). - * A note on Platt's probabilistic outputs for support vector machines. - * - * Should only be used in conjunction with SVM. - * The fitted sigmoid is used to replace all score values. - */ - void scores_to_probabilities(); - - /** @return object name */ - virtual const char* get_name() const { return "BinaryLabels"; } -}; -} -#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * Written (W) 1999-2009 Soeren Sonnenburg + * Written (W) 1999-2008 Gunnar Raetsch + * Written (W) 2011-2012 Heiko Strathmann + * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society + */ + +#ifndef _BINARY_LABELS__H__ +#define _BINARY_LABELS__H__ + +#include +#include +#include +#include + +namespace shogun +{ + class CFile; + class CDenseLabels; + +/** @brief Binary Labels for binary classification + * + * valid values for labels are +1/-1 + * + * Scores may be converted into calibrated probabilities using + * scores_to_probabilities(), which implements the method described in + * Lin, H., Lin, C., and Weng, R. (2007). + * A note on Platt's probabilistic outputs for support vector machines. + * Should only be used in conjunction with SVM. + */ +class CBinaryLabels : public CDenseLabels +{ + public: + /** default constructor */ + CBinaryLabels(); + + /** constructor + * + * @param num_labels number of labels + */ + CBinaryLabels(int32_t num_labels); + + /** constructor + * sets labels with src elements + * + * @param src labels to set + */ + CBinaryLabels(SGVector src); + + /** constructor + * sets values from src vector + * sets labels with sign of src elements with added threshold + * + * @param src labels to set + * @param threshold threshold + */ + CBinaryLabels(SGVector src, float64_t threshold=0.0); + + /** constructor + * + * @param loader File object via which to load data + */ + CBinaryLabels(CFile* loader); + + /** helper method used to specialize a base class instance + * + * @param base_labels its dynamic type must be CBinaryLabels + */ + static CBinaryLabels* obtain_from_generic(CLabels* base_labels); + + /** Make sure the label is valid, otherwise raise SG_ERROR. + * + * possible with subset + * + * @param context optional message to convey the context + */ + virtual void ensure_valid(const char* context=NULL); + + /** get label type + * + * @return label type binary + */ + virtual ELabelType get_label_type(); + + /** Converts all scores to calibrated probabilities by fitting a + * sigmoid function using the method described in + * Lin, H., Lin, C., and Weng, R. (2007). + * A note on Platt's probabilistic outputs for support vector machines. + * + * Should only be used in conjunction with SVM. + * The fitted sigmoid is used to replace all score values. + */ + void scores_to_probabilities(); + + /** @return object name */ + virtual const char* get_name() const { return "BinaryLabels"; } +}; +} +#endif