src/shogun/classifier/svm/NewtonSVM.cpp

/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2012 Harshit Syal
 * Copyright (C) 2012 Harshit Syal
 */
#include <shogun/lib/config.h>

#ifdef HAVE_LAPACK
#include <shogun/classifier/svm/NewtonSVM.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/LinearMachine.h>
#include <shogun/features/DotFeatures.h>
#include <shogun/labels/Labels.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/mathematics/lapack.h>
#include <shogun/lib/Signal.h>

//#define DEBUG_NEWTON
//#define V_NEWTON
using namespace shogun;

CNewtonSVM::CNewtonSVM()
: CLinearMachine(), C(1), use_bias(true)
{
}

CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
: CLinearMachine()
{
	lambda=1/c;
	num_iter=itr;
	prec=1e-6;
	num_iter=20;
	use_bias=true;
	C=c;
	set_features(traindat);
	set_labels(trainlab);
}


CNewtonSVM::~CNewtonSVM()
{
}


bool CNewtonSVM::train_machine(CFeatures* data)
{
	CSignal::clear_cancel();
	ASSERT(m_labels)
	ASSERT(m_labels->get_label_type() == LT_BINARY)

	if (data)
	{
		if (!data->has_property(FP_DOT))
			SG_ERROR("Specified features are not of type CDotFeatures\n")
		set_features((CDotFeatures*) data);
	}

	ASSERT(features)

	SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
	int32_t num_feat=features->get_dim_feature_space();
	int32_t num_vec=features->get_num_vectors();

	//Assigning dimensions for whole class scope
	x_n=num_vec;
	x_d=num_feat;

	ASSERT(num_vec==train_labels.vlen)

	float64_t* weights = SG_CALLOC(float64_t, x_d+1);
	float64_t* out=SG_MALLOC(float64_t, x_n);
	SGVector<float64_t>::fill_vector(out, x_n, 1.0);

	int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
	float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
	float64_t t;

	while(!CSignal::cancel_computations())
	{
		iter++;

		if (iter>num_iter)
		{
			SG_PRINT("Maximum number of Newton steps reached. Try larger lambda")
			break;
		}

		obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);

#ifdef DEBUG_NEWTON
		SG_PRINT("fun linear passed !\n")
		SG_PRINT("Obj =%f\n", obj)
		SG_PRINT("Grad=\n")

		for (int32_t i=0; i<x_d+1; i++)
			SG_PRINT("grad[%d]=%.16g\n", i, grad[i])
		SG_PRINT("SV=\n")

		for (int32_t i=0; i<size_sv; i++)
			SG_PRINT("sv[%d]=%d\n", i, sv[i])
#endif

		SGVector<float64_t> sgv;
		float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
		for (int32_t k=0; k<size_sv; k++)
		{
			sgv=features->get_computed_dot_feature_vector(sv[k]);
			for (int32_t j=0; j<x_d; j++)
				Xsv[k*x_d+j]=sgv.vector[j];
		}
		int32_t tx=x_d;
		int32_t ty=size_sv;
		SGMatrix<float64_t>::transpose_matrix(Xsv, tx, ty);

#ifdef DEBUG_NEWTON
		SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
#endif

		float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
		float64_t* vector=SG_MALLOC(float64_t, x_d+1);

		for (int32_t i=0; i<x_d; i++)
			vector[i]=lambda;

		vector[x_d]=0;

		SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
		float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
				1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
		float64_t* sum=SG_CALLOC(float64_t, x_d);

		for (int32_t j=0; j<x_d; j++)
		{
			for (int32_t i=0; i<size_sv; i++)
				sum[j]+=Xsv[i+j*size_sv];
		}

		float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));

		for (int32_t i=0; i<x_d; i++)
		{
			for (int32_t j=0; j<x_d; j++)
				Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];

			Xsv2sum[x_d*(x_d+1)+i]=sum[i];
		}

		for (int32_t j=0; j<x_d; j++)
			Xsv2sum[j*(x_d+1)+x_d]=sum[j];

		Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
		float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));

		SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);

		SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
		cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
				x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
				Xsv2sum, x_d+1);

		float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
		int32_t r=x_d+1;
		SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);

		float64_t* step=SG_MALLOC(float64_t, r);
		float64_t* s2=SG_MALLOC(float64_t, r);
		cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
				inverse, r, grad, r, 0.0, s2, r);

		for (int32_t i=0; i<r; i++)
			step[i]=-s2[i];

		line_search_linear(weights, step, out, &t);

#ifdef DEBUG_NEWTON
		SG_PRINT("t=%f\n\n", t)

		for (int32_t i=0; i<x_n; i++)
			SG_PRINT("out[%d]=%.16g\n", i, out[i])

		for (int32_t i=0; i<x_d+1; i++)
			SG_PRINT("weights[%d]=%.16g\n", i, weights[i])
#endif

		SGVector<float64_t>::vec1_plus_scalar_times_vec2(weights, t, step, r);
		float64_t newton_decrement;
		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
				step, r, grad, r, 0.0, &newton_decrement, 1);
#ifdef V_NEWTON
		SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
				iter, obj, size_sv, newton_decrement, t);
#endif

		SG_FREE(Xsv);
		SG_FREE(vector);
		SG_FREE(lcrossdiag);
		SG_FREE(Xsv2);
		SG_FREE(Xsv2sum);
		SG_FREE(identity_matrix);
		SG_FREE(inverse);
		SG_FREE(step);
		SG_FREE(s2);

		if (newton_decrement*2<prec*obj)
			break;
	}

#ifdef V_NEWTON
	SG_PRINT("FINAL W AND BIAS Vector=\n\n")
	CMath::display_matrix(weights, x_d+1, 1);
#endif

	set_w(SGVector<float64_t>(weights, x_d));
	set_bias(weights[x_d]);

	SG_FREE(sv);
	SG_FREE(grad);
	SG_FREE(out);

	return true;


}

void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
		out, float64_t* tx)
{
	SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
	float64_t* outz=SG_MALLOC(float64_t, x_n);
	float64_t* temp1=SG_MALLOC(float64_t, x_n);
	float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
	float64_t* outzsv=SG_MALLOC(float64_t, x_n);
	float64_t* Ysv=SG_MALLOC(float64_t, x_n);
	float64_t* Xsv=SG_MALLOC(float64_t, x_n);
	float64_t* temp2=SG_MALLOC(float64_t, x_d);
	float64_t t=0.0;
	float64_t* Xd=SG_MALLOC(float64_t, x_n);

	for (int32_t i=0; i<x_n; i++)
		Xd[i]=features->dense_dot(i, d, x_d);

	SGVector<float64_t>::add_scalar(d[x_d], Xd, x_n);

#ifdef DEBUG_NEWTON
	CMath::display_vector(d, x_d+1, "Weight vector");

	for (int32_t i=0; i<x_d+1; i++)
		SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i])

	CMath::display_vector(Xd, x_n, "XD vector=");
#endif

	float64_t wd;
	cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
			weights, x_d, d, x_d, 0.0, &wd, 1);
	float64_t tempg, dd;
	cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
			x_d, d, x_d, 0.0, &dd, 1);

	float64_t g, h;
	int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);

	do
	{
		SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
		memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
		SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
		SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);

		// Calculation of sv
		sv_len=0;

		for (int32_t i=0; i<x_n; i++)
		{
			if (outz[i]>0)
				sv[sv_len++]=i;
		}

		//Calculation of gradient 'g'
		for (int32_t i=0; i<sv_len; i++)
		{
			outzsv[i]=outz[sv[i]];
			Ysv[i]=Y.vector[sv[i]];
			Xsv[i]=Xd[sv[i]];
		}

		memset(temp1, 0, sizeof(float64_t)*sv_len);
		SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
		tempg=CMath::dot(temp1, Xsv, sv_len);
		g=wd+(t*dd);
		g-=tempg;

		// Calculation of second derivative 'h'
		cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
				Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
		h+=dd;

		// Calculation of 1D Newton step 'd'
		t-=g/h;

		if (((g*g)/h)<1e-10)
			break;

	} while(1);

	for (int32_t i=0; i<x_n; i++)
		out[i]=outz[i];
	*tx=t;

	SG_FREE(sv);
	SG_FREE(temp1);
	SG_FREE(temp2);
	SG_FREE(temp1forout);
	SG_FREE(outz);
	SG_FREE(outzsv);
	SG_FREE(Ysv);
	SG_FREE(Xsv);
	SG_FREE(Xd);
}

void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
		float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
{
	SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();

	for (int32_t i=0; i<x_n; i++)
	{
		if (out[i]<0)
			out[i]=0;
	}

#ifdef DEBUG_NEWTON
	for (int32_t i=0; i<x_n; i++)
		SG_SPRINT("out[%d]=%.16g\n", i, out[i])
#endif

	//create copy of w0
	float64_t* w0=SG_MALLOC(float64_t, x_d+1);
	memcpy(w0, weights, sizeof(float64_t)*(x_d));
	w0[x_d]=0; //do not penalize b

	//create copy of out
	float64_t* out1=SG_MALLOC(float64_t, x_n);

	//compute steps for obj
	SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
	float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
	float64_t C1;
	float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
	memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
	SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
	cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
			w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
	*obj=p1+C1;
	SGVector<float64_t>::scale_vector(lambda, w0, x_d);
	float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y
	SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
	float64_t* temp1=SG_CALLOC(float64_t, x_d);
	SGVector<float64_t> vec;

	for (int32_t i=0; i<x_n; i++)
	{
		features->add_to_dense_vec(temp[i], i, temp1, x_d);
#ifdef DEBUG_NEWTON
		SG_SPRINT("\ntemp[%d]=%f", i, temp[i])
		CMath::display_vector(vec.vector, x_d, "vector");
		CMath::display_vector(temp1, x_d, "debuging");
#endif
	}
	float64_t* p2=SG_MALLOC(float64_t, x_d+1);

	for (int32_t i=0; i<x_d; i++)
		p2[i]=temp1[i];

	p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
	SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
	int32_t sv_len=0;

	for (int32_t i=0; i<x_n; i++)
	{
		if (out[i]>0)
			sv[sv_len++]=i;
	}

	*numsv=sv_len;

	SG_FREE(w0);
	SG_FREE(w0copy);
	SG_FREE(out1);
	SG_FREE(temp);
	SG_FREE(temp1);
	SG_FREE(p2);
}
#endif //HAVE_LAPACK