Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bundle method solver - Example #594

Merged
merged 3 commits into from Jun 21, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 17 additions & 13 deletions src/shogun/structure/DualLibQPBMSOSVM.cpp
Expand Up @@ -22,14 +22,16 @@ CDualLibQPBMSOSVM::CDualLibQPBMSOSVM(
CStructuredModel* model,
CLossFunction* loss,
CStructuredLabels* labs,
CDotFeatures* features,
float64_t lambda)
CDotFeatures* features,
float64_t lambda,
CRiskFunction* risk_function)
:CLinearStructuredOutputMachine(model, loss, labs, features)
{
set_TolRel(0.001);
set_TolAbs(0.0);
set_BufSize(100);
set_BufSize(1000);
set_lambda(lambda);
m_risk_function=risk_function;
}

CDualLibQPBMSOSVM::~CDualLibQPBMSOSVM()
Expand All @@ -39,12 +41,20 @@ CDualLibQPBMSOSVM::~CDualLibQPBMSOSVM()
bool CDualLibQPBMSOSVM::train_machine(CFeatures* data)
{
// get dimension of w
uint32_t nDim=0;
//uint32_t nDim=m_risk_function->get_dim(data); //TODO: get_dim function accessible trough StructuredModel
uint32_t nDim=this->m_model->get_dim();

// Initialize the weight vector
m_w = SGVector< float64_t >(nDim);
m_w.zero();

bmrm_data_T bmrm_data;
bmrm_data.X=this->m_features;
bmrm_data.y=this->m_labels;
bmrm_data.w_dim=nDim;

// call the BMRM solver
bmrm_return_value_T result = svm_bmrm_solver(data, m_w.vector, m_TolRel, m_TolAbs, m_lambda,
m_BufSize, nDim, m_risk_function);
bmrm_return_value_T result = svm_bmrm_solver(&bmrm_data, m_w.vector, m_TolRel, m_TolAbs, m_lambda,
m_BufSize, m_risk_function);

if (result.exitflag==1)
{
Expand All @@ -53,9 +63,3 @@ bool CDualLibQPBMSOSVM::train_machine(CFeatures* data)
return false;
}
}

void CDualLibQPBMSOSVM::register_parameters()
{
SG_ADD(&m_w, "m_w", "Weight vector", MS_NOT_AVAILABLE);
}

8 changes: 1 addition & 7 deletions src/shogun/structure/DualLibQPBMSOSVM.h
Expand Up @@ -26,7 +26,7 @@ class CDualLibQPBMSOSVM : public CLinearStructuredOutputMachine
/** standard constructor
*
*/
CDualLibQPBMSOSVM(CStructuredModel* model, CLossFunction* loss, CStructuredLabels* labs, CDotFeatures* features, float64_t lambda);
CDualLibQPBMSOSVM(CStructuredModel* model, CLossFunction* loss, CStructuredLabels* labs, CDotFeatures* features, float64_t lambda, CRiskFunction* risk_function);

/** destructor */
~CDualLibQPBMSOSVM();
Expand Down Expand Up @@ -62,12 +62,6 @@ class CDualLibQPBMSOSVM : public CLinearStructuredOutputMachine
bool train_machine(CFeatures* data=NULL);

private:
/** register class parameters */
void register_parameters();

private:
/** weight vector */
SGVector< float64_t > m_w;

/** lambda */
float64_t m_lambda;
Expand Down
79 changes: 79 additions & 0 deletions src/shogun/structure/MulticlassRiskFunction.cpp
@@ -0,0 +1,79 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Michal Uricar
* Copyright (C) 2012 Michal Uricar
*/

#include <shogun/features/DotFeatures.h>
#include <shogun/structure/MulticlassRiskFunction.h>
#include <shogun/structure/libbmrm.h>
#include <shogun/structure/MulticlassSOLabels.h>
#include <shogun/structure/MulticlassModel.h>

using namespace shogun;

CMulticlassRiskFunction::CMulticlassRiskFunction()
:CRiskFunction()
{
}

CMulticlassRiskFunction::~CMulticlassRiskFunction()
{
}

void CMulticlassRiskFunction::risk(void* data, float64_t* R, float64_t* subgrad, float64_t* W)
{
bmrm_data_T* data_struct=(bmrm_data_T*)data;
CDotFeatures* X=(CDotFeatures*)data_struct->X;
CMulticlassSOLabels* y=(CMulticlassSOLabels*)data_struct->y;

uint32_t N=X->get_num_vectors();
uint32_t num_classes=y->get_num_classes();
uint32_t feats_dim=X->get_dim_feature_space();
uint32_t w_dim=(uint32_t)data_struct->w_dim;

*R=0;
SGVector< float64_t > subgradient(w_dim);
subgradient.zero();

SGVector< float64_t > xi;

float64_t Rtmp=0.0;
float64_t Rmax=0.0;
float64_t loss=0.0;
uint32_t yhat=0;
uint32_t GT=0;

/* loop through examples */
for(uint32_t i = 0; i < N; ++i)
{
Rmax=-CMath::INFTY;
xi=X->get_computed_dot_feature_vector(i);
GT=(uint32_t)((CRealNumber*)y->get_label(i))->value;

for (uint32_t c = 0; c < num_classes; ++c)
{
loss=(c == GT) ? 0.0 : 1.0;
Rtmp = loss + SGVector< float64_t >::dot(W+c*feats_dim, xi.vector, feats_dim) - SGVector< float64_t >::dot(W+GT*feats_dim, xi.vector, feats_dim);

if (Rtmp > Rmax)
{
Rmax=Rtmp;
yhat=c;
}
}
*R += Rmax;

for(uint32_t j = 0; j < feats_dim; ++j)
{
subgradient[yhat*feats_dim+j]+=xi[j];
subgradient[GT*feats_dim+j]-=xi[j];
}

}
memcpy(subgrad, subgradient.vector, w_dim*sizeof(float64_t));
}
39 changes: 39 additions & 0 deletions src/shogun/structure/MulticlassRiskFunction.h
@@ -0,0 +1,39 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Michal Uricar
* Copyright (C) 2012 Michal Uricar
*/

#ifndef _MULTICLASSRISK_FUNCTION__H__
#define _MULTICLASSRISK_FUNCITON__H__

#include <shogun/structure/RiskFunction.h>

namespace shogun
{

/** @brief Class CMulticlassRiskFunction
*
*/
class CMulticlassRiskFunction : public CRiskFunction
{
public:
/** default constructor */
CMulticlassRiskFunction();

/** destructor */
~CMulticlassRiskFunction();

virtual void risk(void* data, float64_t* R, float64_t* subgrad, float64_t* W);

virtual const char* get_name() const { return "MulticlassRiskFunction"; }

}; /* CMulticlassRiskFunction */

}

#endif /* _MULTICLASSTISK_FUNCTION__H__ */
9 changes: 0 additions & 9 deletions src/shogun/structure/RiskFunction.h
Expand Up @@ -12,9 +12,6 @@
#define _RISK_FUNCTION__H__

#include <shogun/base/SGObject.h>
//#include <shogun/features/Features.h>
//#include <shogun/labels/StructuredLabels.h>
//#include <shogun/lib/SGVector.h>

namespace shogun
{
Expand All @@ -34,14 +31,8 @@ class CRiskFunction : public CSGObject
/** computes the value of the risk function and sub-gradient at given point
*
*/
//virtual void risk(void* data, float64_t* R, SGVector< float64_t > subgrad, SGVector< float64_t > w) = 0;
virtual void risk(void* data, float64_t* R, float64_t* subgrad, float64_t* W) = 0;

/** get the dimension of joint feature vector w
*
*/
//virtual uint32_t get_dim(void* data) = 0; //TODO: Delete, will be accessible from StructuredModel

/** @return name of SGSerializable */
virtual const char* get_name() const { return "RiskFunction"; }

Expand Down
82 changes: 47 additions & 35 deletions src/shogun/structure/libbmrm.cpp
Expand Up @@ -17,10 +17,12 @@

#include <shogun/structure/libbmrm.h>
#include <shogun/lib/external/libqp.h>
#include <shogun/lib/Time.h>

namespace shogun
{
static const uint32_t QPSolverMaxIter = 10000000;
static const float64_t epsilon = 0.0000000001;

static float64_t *H;
static uint32_t BufSize;
Expand All @@ -34,22 +36,25 @@ static const float64_t *get_col( uint32_t i)
}

bmrm_return_value_T svm_bmrm_solver(
void *data,
float64_t *W,
float64_t TolRel,
float64_t TolAbs,
float64_t lambda,
uint32_t _BufSize,
uint32_t nDim,
CRiskFunction* risk_function)
//void (*risk_function)(void*, float64_t*, float64_t*, float64_t*))
bmrm_data_T* data,
float64_t* W,
float64_t TolRel,
float64_t TolAbs,
float64_t lambda,
uint32_t _BufSize,
CRiskFunction* risk_function)
{
bmrm_return_value_T bmrm = {0, 0, 0, 0, 0, 0};
bmrm_return_value_T bmrm = {0, 0, 0, 0, 0, 0, 0};
libqp_state_T qp_exitflag;
float64_t *b, *beta, *diag_H, sq_norm_W;
float64_t R, *subgrad, *A, QPSolverTolRel, rsum, C = 1.0;
uint32_t *I;
uint8_t S = 1;
uint32_t nDim=data->w_dim;
CTime ttime;
float64_t tstart, tstop;

tstart=ttime.cur_time_diff(false);

BufSize = _BufSize;
QPSolverTolRel = TolRel*0.5;
Expand All @@ -62,9 +67,6 @@ bmrm_return_value_T svm_bmrm_solver(
diag_H = NULL;
I = NULL;

LIBBMRM_FREE(W);
W = NULL;

H = (float64_t*)LIBBMRM_CALLOC(BufSize*BufSize, sizeof(float64_t));
if (H == NULL)
{
Expand Down Expand Up @@ -114,57 +116,57 @@ bmrm_return_value_T svm_bmrm_solver(
goto cleanup;
}

/* initial solution */
W = (float64_t*)LIBBMRM_CALLOC(nDim, sizeof(float64_t));
if (W == NULL)
{
bmrm.exitflag = -2;
goto cleanup;
}

//risk_function(data, &R, subgrad, NULL);
/* Iinitial solution */
risk_function->risk(data, &R, subgrad, W);

bmrm.nCP = 0;
bmrm.nIter = 0;
bmrm.exitflag = 0;

b[0] = -R;
LIBBMRM_MEMCPY(subgrad, A, nDim*sizeof(float64_t));
LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));

/* Compute initial value of Fp, Fd, assuming that W is zero vector */
sq_norm_W = 0;
bmrm.Fp = R + 0.5*lambda*sq_norm_W;
bmrm.Fd = -LIBBMRM_PLUS_INF;

tstop=ttime.cur_time_diff(false);

/* Verbose output */
SG_SPRINT("%4d: tim=%.3f, Fp=%f, Fd=%f, R=%f\n",
bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);

/* main loop */
while (bmrm.exitflag == 0)
{
tstart=ttime.cur_time_diff(false);
bmrm.nIter++;

/* Update H */
if (bmrm.nCP > 0)
{
for (uint32_t i = 0; i < bmrm.nCP - 1; i++)
for (uint32_t i = 0; i < bmrm.nCP; i++)
{
rsum = 0.0;
for (uint32_t j = 0; j < nDim; j++)
{
rsum += A[LIBBMRM_INDEX(j, i, nDim)]*A[LIBBMRM_INDEX(j, bmrm.nCP, nDim)]/lambda;
rsum += A[LIBBMRM_INDEX(j, i, nDim)]*A[LIBBMRM_INDEX(j, bmrm.nCP, nDim)];
}
H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)] = rsum;
H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)] = rsum/lambda;
}
for (uint32_t i = 0; i < bmrm.nCP - 1; i++)
for (uint32_t i = 0; i < bmrm.nCP; i++)
{
H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)] = H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)];
}
}
H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)] = 0.0;
//H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)] = 0.0;

for (uint32_t i = 0; i < nDim; i++)
H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)] += A[LIBBMRM_INDEX(i, bmrm.nCP, nDim)]*A[LIBBMRM_INDEX(i, bmrm.nCP, nDim)]/lambda;


diag_H[bmrm.nCP] = H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
I[bmrm.nCP] = 1;

bmrm.nCP++;

Expand All @@ -175,20 +177,24 @@ bmrm_return_value_T svm_bmrm_solver(
bmrm.qp_exitflag = qp_exitflag.exitflag;

/* W update */
for (uint32_t i = 0; i < nDim; i++)
for (uint32_t i = 0; i < nDim; ++i)
{
rsum = 0.0;
for (uint32_t j = 0; j < bmrm.nCP; j++)
for (uint32_t j = 0; j < bmrm.nCP; ++j)
{
rsum += A[LIBBMRM_INDEX(i, j, nDim)]*beta[j]/lambda;
rsum += A[LIBBMRM_INDEX(i, j, nDim)]*beta[j];
}
W[i] = -rsum;
W[i] = -rsum/lambda;
}

/* compute number of active cutting planes */
bmrm.nzA = 0;
for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
bmrm.nzA += (beta[aaa] > epsilon) ? 1 : 0;

/* risk and subgradient computation */
//risk_function(data, &R, subgrad, W);
risk_function->risk(data, &R, subgrad, W);
LIBBMRM_MEMCPY(subgrad, A+bmrm.nCP*nDim, nDim*sizeof(float64_t));
LIBBMRM_MEMCPY(A+bmrm.nCP*nDim, subgrad, nDim*sizeof(float64_t));
b[bmrm.nCP] = -R;
for (uint32_t j = 0; j < nDim; j++)
b[bmrm.nCP] += subgrad[j]*W[j];
Expand All @@ -205,6 +211,12 @@ bmrm_return_value_T svm_bmrm_solver(
if (bmrm.Fp - bmrm.Fd <= TolAbs) bmrm.exitflag = 2;
if (bmrm.nCP >= BufSize) bmrm.exitflag = -1;

tstop=ttime.cur_time_diff(false);

/* Verbose output */
SG_SPRINT("%4d: tim=%.3f, Fp=%f, Fd=%f, (Fp-Fd)=%f, (Fp-Fd)/Fp=%f, R=%f, nCP=%d, nzA=%d\n",
bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd, (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA);

} /* end of main loop */

cleanup:
Expand Down