Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Christian Widmer
* Written (W) 2007-2010 Soeren Sonnenburg
* Copyright (c) 2007-2009 The LIBLINEAR Project.
* Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#include <vector>
#include <shogun/lib/config.h>
#ifdef HAVE_LAPACK
#include <shogun/base/Parameter.h>
#include <shogun/base/progress.h>
#include <shogun/features/DotFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/Signal.h>
#include <shogun/lib/Time.h>
#include <shogun/optimization/liblinear/tron.h>
#include <shogun/transfer/multitask/LibLinearMTL.h>
using namespace shogun;
CLibLinearMTL::CLibLinearMTL()
: CLinearMachine()
{
init();
}
CLibLinearMTL::CLibLinearMTL(
float64_t C, CDotFeatures* traindat, CLabels* trainlab)
: CLinearMachine()
{
init();
C1=C;
C2=C;
use_bias=true;
set_features(traindat);
set_labels(trainlab);
}
void CLibLinearMTL::init()
{
use_bias=false;
C1=1;
C2=1;
set_max_iterations();
epsilon=1e-5;
SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
MS_NOT_AVAILABLE);
SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
MS_NOT_AVAILABLE);
}
CLibLinearMTL::~CLibLinearMTL()
{
}
bool CLibLinearMTL::train_machine(CFeatures* data)
{
ASSERT(m_labels)
if (data)
{
if (!data->has_property(FP_DOT))
SG_ERROR("Specified features are not of type CDotFeatures\n")
set_features((CDotFeatures*) data);
}
ASSERT(features)
m_labels->ensure_valid();
int32_t num_train_labels=m_labels->get_num_labels();
int32_t num_feat=features->get_dim_feature_space();
int32_t num_vec=features->get_num_vectors();
if (num_vec!=num_train_labels)
{
SG_ERROR("number of vectors %d does not match "
"number of training labels %d\n",
num_vec, num_train_labels);
}
float64_t* training_w = NULL;
if (use_bias)
training_w=SG_MALLOC(float64_t, num_feat+1);
else
training_w=SG_MALLOC(float64_t, num_feat+0);
liblinear_problem prob;
if (use_bias)
{
prob.n=num_feat+1;
memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
}
else
{
prob.n=num_feat;
memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
}
prob.l=num_vec;
prob.x=features;
prob.y=SG_MALLOC(float64_t, prob.l);
prob.use_bias=use_bias;
for (int32_t i=0; i<prob.l; i++)
prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
int pos = 0;
int neg = 0;
for(int i=0;i<prob.l;i++)
{
if(prob.y[i]==+1)
pos++;
}
neg = prob.l - pos;
SG_INFO("%d training points %d dims\n", prob.l, prob.n)
SG_INFO("%d positives, %d negatives\n", pos, neg)
double Cp=C1;
double Cn=C2;
solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
if (use_bias)
set_bias(training_w[num_feat]);
else
set_bias(0);
SG_FREE(prob.y);
SGVector<float64_t> w(num_feat);
for (int32_t i=0; i<num_feat; i++)
w[i] = training_w[i];
set_w(w);
return true;
}
// A coordinate descent algorithm for
// L1-loss and L2-loss SVM dual problems
//
// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
// s.t. 0 <= alpha_i <= upper_bound_i,
//
// where Qij = yi yj xi^T xj and
// D is a diagonal matrix
//
// In L1-SVM case:
// upper_bound_i = Cp if y_i = 1
// upper_bound_i = Cn if y_i = -1
// D_ii = 0
// In L2-SVM case:
// upper_bound_i = INF
// D_ii = 1/(2*Cp) if y_i = 1
// D_ii = 1/(2*Cn) if y_i = -1
//
// Given:
// x, y, Cp, Cn
// eps is the stopping tolerance
//
// solution will be put in w
#undef GETI
#define GETI(i) (y[i]+1)
// To support weights for instances, use GETI(i) (i)
void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn)
{
int l = prob->l;
int w_size = prob->n;
int i, s, iter = 0;
double C, d, G;
double *QD = SG_MALLOC(double, l);
int *index = SG_MALLOC(int, l);
//double *alpha = SG_MALLOC(double, l);
int32_t *y = SG_MALLOC(int32_t, l);
int active_size = l;
// PG: projected gradient, for shrinking and stopping
double PG;
double PGmax_old = CMath::INFTY;
double PGmin_old = -CMath::INFTY;
double PGmax_new, PGmin_new;
// matrix W
V = SGMatrix<float64_t>(w_size,num_tasks);
// save alpha
alphas = SGVector<float64_t>(l);
// default solver_type: L2R_L2LOSS_SVC_DUAL
double diag[3] = {0.5/Cn, 0, 0.5/Cp};
double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
if(true)
{
diag[0] = 0;
diag[2] = 0;
upper_bound[0] = Cn;
upper_bound[2] = Cp;
}
int n = prob->n;
if (prob->use_bias)
n--;
// set V to zero
for(int32_t k=0; k<w_size*num_tasks; k++)
{
V.matrix[k] = 0;
}
// init alphas
for(i=0; i<l; i++)
{
alphas[i] = 0;
}
for(i=0; i<l; i++)
{
if(prob->y[i] > 0)
{
y[i] = +1;
}
else
{
y[i] = -1;
}
QD[i] = diag[GETI(i)];
QD[i] += prob->x->dot(i, prob->x,i);
index[i] = i;
}
auto pb = progress(range(10));
CTime start_time;
while (iter < max_iterations && !cancel_computation())
{
if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
break;
PGmax_new = -CMath::INFTY;
PGmin_new = CMath::INFTY;
for (i=0; i<active_size; i++)
{
int j = CMath::random(i, active_size-1);
CMath::swap(index[i], index[j]);
}
for (s=0;s<active_size;s++)
{
i = index[s];
int32_t yi = y[i];
int32_t ti = task_indicator_lhs[i];
C = upper_bound[GETI(i)];
// we compute the inner sum by looping over tasks
// this update is the main result of MTL_DCD
typedef std::map<index_t, float64_t>::const_iterator map_iter;
float64_t inner_sum = 0;
for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
{
// get data from sparse matrix
int32_t e_i = it->first;
float64_t sim = it->second;
// fetch vector
float64_t* tmp_w = V.get_column_vector(e_i);
inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
//possibly deal with bias
//if (prob->use_bias)
// G+=w[n];
}
// compute gradient
G = inner_sum-1.0;
// check if point can be removed from active set
PG = 0;
if (alphas[i] == 0)
{
if (G > PGmax_old)
{
active_size--;
CMath::swap(index[s], index[active_size]);
s--;
continue;
}
else if (G < 0)
PG = G;
}
else if (alphas[i] == C)
{
if (G < PGmin_old)
{
active_size--;
CMath::swap(index[s], index[active_size]);
s--;
continue;
}
else if (G > 0)
PG = G;
}
else
PG = G;
PGmax_new = CMath::max(PGmax_new, PG);
PGmin_new = CMath::min(PGmin_new, PG);
if(fabs(PG) > 1.0e-12)
{
// save previous alpha
double alpha_old = alphas[i];
// project onto feasible set
alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
d = (alphas[i] - alpha_old)*yi;
// update corresponding weight vector
float64_t* tmp_w = V.get_column_vector(ti);
prob->x->add_to_dense_vec(d, i, tmp_w, n);
//if (prob->use_bias)
// w[n]+=d;
}
}
iter++;
float64_t gap=PGmax_new - PGmin_new;
pb.print_absolute(
gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps));
if(gap <= eps)
{
if(active_size == l)
break;
else
{
active_size = l;
PGmax_old = CMath::INFTY;
PGmin_old = -CMath::INFTY;
continue;
}
}
PGmax_old = PGmax_new;
PGmin_old = PGmin_new;
if (PGmax_old <= 0)
PGmax_old = CMath::INFTY;
if (PGmin_old >= 0)
PGmin_old = -CMath::INFTY;
}
pb.complete_absolute();
SG_INFO("optimization finished, #iter = %d\n",iter)
if (iter >= max_iterations)
{
SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
"(also see liblinear FAQ)\n\n");
}
delete [] QD;
//delete [] alpha;
delete [] y;
delete [] index;
}
float64_t CLibLinearMTL::compute_primal_obj()
{
/* python protype
num_param = param.shape[0]
num_dim = len(all_xt[0])
num_tasks = int(num_param / num_dim)
num_examples = len(all_xt)
# vector to matrix
W = param.reshape(num_tasks, num_dim)
obj = 0
reg_obj = 0
loss_obj = 0
assert len(all_xt) == len(all_xt) == len(task_indicator)
# L2 regularizer
for t in xrange(num_tasks):
reg_obj += 0.5 * np.dot(W[t,:], W[t,:])
# MTL regularizer
for s in xrange(num_tasks):
for t in xrange(num_tasks):
reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])
# loss
for i in xrange(num_examples):
ti = task_indicator[i]
t = all_lt[i] * np.dot(W[ti,:], all_xt[i])
# hinge
loss_obj += max(0, 1 - t)
# combine to final objective
obj = reg_obj + C * loss_obj
return obj
*/
SG_INFO("DONE to compute Primal OBJ\n")
// calculate objective value
SGMatrix<float64_t> W = get_W();
float64_t obj = 0;
int32_t num_vec = features->get_num_vectors();
int32_t w_size = features->get_dim_feature_space();
// L2 regularizer
for (int32_t t=0; t<num_tasks; t++)
{
float64_t* w_t = W.get_column_vector(t);
for(int32_t i=0; i<w_size; i++)
{
obj += 0.5 * w_t[i]*w_t[i];
}
}
// MTL regularizer
for (int32_t s=0; s<num_tasks; s++)
{
float64_t* w_s = W.get_column_vector(s);
for (int32_t t=0; t<num_tasks; t++)
{
float64_t* w_t = W.get_column_vector(t);
float64_t l = graph_laplacian.matrix[s*num_tasks+t];
for(int32_t i=0; i<w_size; i++)
{
obj += 0.5 * l * w_s[i]*w_t[i];
}
}
}
// loss
for(int32_t i=0; i<num_vec; i++)
{
int32_t ti = task_indicator_lhs[i];
float64_t* w_t = W.get_column_vector(ti);
float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
// hinge loss
obj += C1 * CMath::max(0.0, 1 - residual);
}
SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj)
return obj;
}
float64_t CLibLinearMTL::compute_dual_obj()
{
/* python prototype
num_xt = len(xt)
# compute quadratic term
for i in xrange(num_xt):
for j in xrange(num_xt):
s = task_indicator[i]
t = task_indicator[j]
obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])
return obj
*/
SG_INFO("starting to compute DUAL OBJ\n")
int32_t num_vec=features->get_num_vectors();
float64_t obj = 0;
// compute linear term
for(int32_t i=0; i<num_vec; i++)
{
obj += alphas[i];
}
// compute quadratic term
int32_t v_size = features->get_dim_feature_space();
// efficient computation
for (int32_t s=0; s<num_tasks; s++)
{
float64_t* v_s = V.get_column_vector(s);
for (int32_t t=0; t<num_tasks; t++)
{
float64_t* v_t = V.get_column_vector(t);
const float64_t ts = task_similarity_matrix(s, t);
for(int32_t i=0; i<v_size; i++)
{
obj -= 0.5 * ts * v_s[i]*v_t[i];
}
}
}
/*
// naiive implementation
float64_t tmp_val2 = 0;
for(int32_t i=0; i<num_vec; i++)
{
int32_t ti_i = task_indicator_lhs[i];
for(int32_t j=0; j<num_vec; j++)
{
// look up task similarity
int32_t ti_j = task_indicator_lhs[j];
const float64_t ts = task_similarity_matrix(ti_i, ti_j);
// compute objective
tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) *
((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);
}
}
*/
return obj;
}
float64_t CLibLinearMTL::compute_duality_gap()
{
return 0.0;
}
#endif //HAVE_LAPACK