From 62677bb63d70b603cc99132c7ec0ac264fe9d954 Mon Sep 17 00:00:00 2001
From: Viktor Gal <viktor.gal@maeth.com>
Date: Mon, 29 Jan 2018 12:14:21 +0100
Subject: [PATCH] relicensed libqp and libocas to BSD

---
 src/gpl                                       |    2 +-
 src/shogun/classifier/svm/SVMOcas.cpp         |  362 +++
 src/shogun/classifier/svm/SVMOcas.h           |  239 ++
 src/shogun/classifier/svm/WDSVMOcas.cpp       |  641 ++++++
 src/shogun/classifier/svm/WDSVMOcas.h         |  371 +++
 src/shogun/latent/LatentSVM.cpp               |   74 +
 src/shogun/latent/LatentSVM.h                 |   74 +
 src/shogun/lib/external/libocas.cpp           | 2002 +++++++++++++++++
 src/shogun/lib/external/libocas.h             |  127 ++
 src/shogun/lib/external/libocas_common.h      |   19 +
 src/shogun/lib/external/libqp.h               |   73 +
 src/shogun/lib/external/libqp_gsmo.cpp        |  251 +++
 src/shogun/lib/external/libqp_splx.cpp        |  408 ++++
 src/shogun/multiclass/MulticlassOCAS.cpp      |  265 +++
 src/shogun/multiclass/MulticlassOCAS.h        |  157 ++
 tests/unit/classifier/svm/SVMOcas_unittest.cc |    9 +-
 tests/unit/latent/LatentSVM_unittest.cc       |    2 -
 .../multiclass/MulticlassOCAS_unittest.cc     |    5 +-
 18 files changed, 5068 insertions(+), 13 deletions(-)
 create mode 100644 src/shogun/classifier/svm/SVMOcas.cpp
 create mode 100644 src/shogun/classifier/svm/SVMOcas.h
 create mode 100644 src/shogun/classifier/svm/WDSVMOcas.cpp
 create mode 100644 src/shogun/classifier/svm/WDSVMOcas.h
 create mode 100644 src/shogun/latent/LatentSVM.cpp
 create mode 100644 src/shogun/latent/LatentSVM.h
 create mode 100644 src/shogun/lib/external/libocas.cpp
 create mode 100644 src/shogun/lib/external/libocas.h
 create mode 100644 src/shogun/lib/external/libocas_common.h
 create mode 100644 src/shogun/lib/external/libqp.h
 create mode 100644 src/shogun/lib/external/libqp_gsmo.cpp
 create mode 100644 src/shogun/lib/external/libqp_splx.cpp
 create mode 100644 src/shogun/multiclass/MulticlassOCAS.cpp
 create mode 100644 src/shogun/multiclass/MulticlassOCAS.h

diff --git a/src/gpl b/src/gpl
index 29ca2ff930c..0b5c4f4c616 160000
--- a/src/gpl
+++ b/src/gpl
@@ -1 +1 @@
-Subproject commit 29ca2ff930cd5d963b3a78e79160cc04d48970c2
+Subproject commit 0b5c4f4c616205c8765163c2dc03474656f0f7b6
diff --git a/src/shogun/classifier/svm/SVMOcas.cpp b/src/shogun/classifier/svm/SVMOcas.cpp
new file mode 100644
index 00000000000..d8266f00f94
--- /dev/null
+++ b/src/shogun/classifier/svm/SVMOcas.cpp
@@ -0,0 +1,362 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+#include <shogun/classifier/svm/SVMOcas.h>
+
+#include <shogun/labels/Labels.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/lib/Time.h>
+#include <shogun/base/Parameter.h>
+#include <shogun/base/Parallel.h>
+#include <shogun/machine/LinearMachine.h>
+#include <shogun/features/DotFeatures.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/labels/BinaryLabels.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+
+using namespace shogun;
+
+CSVMOcas::CSVMOcas()
+: CLinearMachine()
+{
+	init();
+}
+
+CSVMOcas::CSVMOcas(E_SVM_TYPE type)
+: CLinearMachine()
+{
+	init();
+	method=type;
+}
+
+CSVMOcas::CSVMOcas(
+	float64_t C, CDotFeatures* traindat, CLabels* trainlab)
+: CLinearMachine()
+{
+	init();
+	C1=C;
+	C2=C;
+
+	set_features(traindat);
+	set_labels(trainlab);
+}
+
+
+CSVMOcas::~CSVMOcas()
+{
+}
+
+bool CSVMOcas::train_machine(CFeatures* data)
+{
+	SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
+	SG_DEBUG("use_bias = %i\n", get_bias_enabled())
+
+	ASSERT(m_labels)
+	ASSERT(m_labels->get_label_type() == LT_BINARY)
+	if (data)
+	{
+		if (!data->has_property(FP_DOT))
+			SG_ERROR("Specified features are not of type CDotFeatures\n")
+		set_features((CDotFeatures*) data);
+	}
+	ASSERT(features)
+
+	int32_t num_vec=features->get_num_vectors();
+	lab = SGVector<float64_t>(num_vec);
+	for (int32_t i=0; i<num_vec; i++)
+		lab[i] = ((CBinaryLabels*)m_labels)->get_label(i);
+
+	current_w = SGVector<float64_t>(features->get_dim_feature_space());
+	current_w.zero();
+
+	if (num_vec!=lab.vlen || num_vec<=0)
+		SG_ERROR("num_vec=%d num_train_labels=%d\n", num_vec, lab.vlen)
+
+	SG_FREE(old_w);
+	old_w=SG_CALLOC(float64_t, current_w.vlen);
+	bias=0;
+	old_bias=0;
+
+	tmp_a_buf=SG_CALLOC(float64_t, current_w.vlen);
+	cp_value=SG_CALLOC(float64_t*, bufsize);
+	cp_index=SG_CALLOC(uint32_t*, bufsize);
+	cp_nz_dims=SG_CALLOC(uint32_t, bufsize);
+	cp_bias=SG_CALLOC(float64_t, bufsize);
+
+	float64_t TolAbs=0;
+	float64_t QPBound=0;
+	int32_t Method=0;
+	if (method == SVM_OCAS)
+		Method = 1;
+	ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
+			TolAbs, QPBound, get_max_train_time(), bufsize, Method,
+			&CSVMOcas::compute_W,
+			&CSVMOcas::update_W,
+			&CSVMOcas::add_new_cut,
+			&CSVMOcas::compute_output,
+			&CSVMOcas::sort,
+			&CSVMOcas::print,
+			this);
+
+	SG_INFO("Ocas Converged after %d iterations\n"
+			"==================================\n"
+			"timing statistics:\n"
+			"output_time: %f s\n"
+			"sort_time: %f s\n"
+			"add_time: %f s\n"
+			"w_time: %f s\n"
+			"solver_time %f s\n"
+			"ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
+			result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
+
+	SG_FREE(tmp_a_buf);
+
+	primal_objective = result.Q_P;
+
+	uint32_t num_cut_planes = result.nCutPlanes;
+
+	SG_DEBUG("num_cut_planes=%d\n", num_cut_planes)
+	for (uint32_t i=0; i<num_cut_planes; i++)
+	{
+		SG_DEBUG("cp_value[%d]=%p\n", i, cp_value)
+		SG_FREE(cp_value[i]);
+		SG_DEBUG("cp_index[%d]=%p\n", i, cp_index)
+		SG_FREE(cp_index[i]);
+	}
+
+	SG_FREE(cp_value);
+	cp_value=NULL;
+	SG_FREE(cp_index);
+	cp_index=NULL;
+	SG_FREE(cp_nz_dims);
+	cp_nz_dims=NULL;
+	SG_FREE(cp_bias);
+	cp_bias=NULL;
+
+	SG_FREE(old_w);
+	old_w=NULL;
+
+	set_w(current_w);
+
+	return true;
+}
+
+/*----------------------------------------------------------------------------------
+  sq_norm_W = sparse_update_W( t ) does the following:
+
+  W = oldW*(1-t) + t*W;
+  sq_norm_W = W'*W;
+
+  ---------------------------------------------------------------------------------*/
+float64_t CSVMOcas::update_W( float64_t t, void* ptr )
+{
+  float64_t sq_norm_W = 0;
+  CSVMOcas* o = (CSVMOcas*) ptr;
+  uint32_t nDim = (uint32_t) o->current_w.vlen;
+  float64_t* W = o->current_w.vector;
+  float64_t* oldW=o->old_w;
+
+  for(uint32_t j=0; j <nDim; j++)
+  {
+	  W[j] = oldW[j]*(1-t) + t*W[j];
+	  sq_norm_W += W[j]*W[j];
+  }
+  o->bias=o->old_bias*(1-t) + t*o->bias;
+  sq_norm_W += CMath::sq(o->bias);
+
+  return( sq_norm_W );
+}
+
+/*----------------------------------------------------------------------------------
+  sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
+
+    new_a = sum(data_X(:,find(new_cut ~=0 )),2);
+    new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
+    sparse_A(:,nSel+1) = new_a;
+
+  ---------------------------------------------------------------------------------*/
+int CSVMOcas::add_new_cut(
+	float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
+	uint32_t nSel, void* ptr)
+{
+	CSVMOcas* o = (CSVMOcas*) ptr;
+	CDotFeatures* f = o->features;
+	uint32_t nDim=(uint32_t) o->current_w.vlen;
+	float64_t* y = o->lab.vector;
+
+	float64_t** c_val = o->cp_value;
+	uint32_t** c_idx = o->cp_index;
+	uint32_t* c_nzd = o->cp_nz_dims;
+	float64_t* c_bias = o->cp_bias;
+
+	float64_t sq_norm_a;
+	uint32_t i, j, nz_dims;
+
+	/* temporary vector */
+	float64_t* new_a = o->tmp_a_buf;
+	memset(new_a, 0, sizeof(float64_t)*nDim);
+
+	for(i=0; i < cut_length; i++)
+	{
+		f->add_to_dense_vec(y[new_cut[i]], new_cut[i], new_a, nDim);
+
+		if (o->use_bias)
+			c_bias[nSel]+=y[new_cut[i]];
+	}
+
+	/* compute new_a'*new_a and count number of non-zerou dimensions */
+	nz_dims = 0;
+	sq_norm_a = CMath::sq(c_bias[nSel]);
+	for(j=0; j < nDim; j++ ) {
+		if(new_a[j] != 0) {
+			nz_dims++;
+			sq_norm_a += new_a[j]*new_a[j];
+		}
+	}
+
+	/* sparsify new_a and insert it to the last column of sparse_A */
+	c_nzd[nSel] = nz_dims;
+	c_idx[nSel]=NULL;
+	c_val[nSel]=NULL;
+
+	if(nz_dims > 0)
+	{
+		c_idx[nSel]=SG_MALLOC(uint32_t, nz_dims);
+		c_val[nSel]=SG_MALLOC(float64_t, nz_dims);
+
+		uint32_t idx=0;
+		for(j=0; j < nDim; j++ )
+		{
+			if(new_a[j] != 0)
+			{
+				c_idx[nSel][idx] = j;
+				c_val[nSel][idx++] = new_a[j];
+			}
+		}
+	}
+
+	new_col_H[nSel] = sq_norm_a;
+
+	for(i=0; i < nSel; i++)
+	{
+		float64_t tmp = c_bias[nSel]*c_bias[i];
+		for(j=0; j < c_nzd[i]; j++)
+			tmp += new_a[c_idx[i][j]]*c_val[i][j];
+
+		new_col_H[i] = tmp;
+	}
+	//CMath::display_vector(new_col_H, nSel+1, "new_col_H");
+	//CMath::display_vector((int32_t*) c_idx[nSel], (int32_t) nz_dims, "c_idx");
+	//CMath::display_vector((float64_t*) c_val[nSel], nz_dims, "c_val");
+	return 0;
+}
+
+int CSVMOcas::sort(float64_t* vals, float64_t* data, uint32_t size)
+{
+	CMath::qsort_index(vals, data, size);
+	return 0;
+}
+
+/*----------------------------------------------------------------------
+  sparse_compute_output( output ) does the follwing:
+
+  output = data_X'*W;
+  ----------------------------------------------------------------------*/
+int CSVMOcas::compute_output(float64_t *output, void* ptr)
+{
+	CSVMOcas* o = (CSVMOcas*) ptr;
+	CDotFeatures* f=o->features;
+	int32_t nData=f->get_num_vectors();
+
+	float64_t* y = o->lab.vector;
+
+	f->dense_dot_range(output, 0, nData, y, o->current_w.vector, o->current_w.vlen, 0.0);
+
+	for (int32_t i=0; i<nData; i++)
+		output[i]+=y[i]*o->bias;
+	return 0;
+}
+
+/*----------------------------------------------------------------------
+  sq_norm_W = compute_W( alpha, nSel ) does the following:
+
+  oldW = W;
+  W = sparse_A(:,1:nSel)'*alpha;
+  sq_norm_W = W'*W;
+  dp_WoldW = W'*oldW';
+
+  ----------------------------------------------------------------------*/
+void CSVMOcas::compute_W(
+	float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
+	void* ptr )
+{
+	CSVMOcas* o = (CSVMOcas*) ptr;
+	uint32_t nDim= (uint32_t) o->current_w.vlen;
+	CMath::swap(o->current_w.vector, o->old_w);
+	SGVector<float64_t> W(o->current_w.vector, nDim, false);
+	SGVector<float64_t> oldW(o->old_w, nDim, false);
+	linalg::zero(W);
+	float64_t old_bias=o->bias;
+	float64_t bias=0;
+
+	float64_t** c_val = o->cp_value;
+	uint32_t** c_idx = o->cp_index;
+	uint32_t* c_nzd = o->cp_nz_dims;
+	float64_t* c_bias = o->cp_bias;
+
+	for(uint32_t i=0; i<nSel; i++)
+	{
+		uint32_t nz_dims = c_nzd[i];
+
+		if(nz_dims > 0 && alpha[i] > 0)
+		{
+			for(uint32_t j=0; j < nz_dims; j++)
+				W[c_idx[i][j]] += alpha[i]*c_val[i][j];
+		}
+		bias += c_bias[i]*alpha[i];
+	}
+
+	*sq_norm_W = linalg::dot(W, W) + CMath::sq(bias);
+	*dp_WoldW = linalg::dot(W, oldW) + bias*old_bias;
+	//SG_PRINT("nSel=%d sq_norm_W=%f dp_WoldW=%f\n", nSel, *sq_norm_W, *dp_WoldW)
+
+	o->bias = bias;
+	o->old_bias = old_bias;
+}
+
+void CSVMOcas::init()
+{
+	use_bias=true;
+	bufsize=3000;
+	C1=1;
+	C2=1;
+
+	epsilon=1e-3;
+	method=SVM_OCAS;
+	old_w=NULL;
+	tmp_a_buf=NULL;
+	cp_value=NULL;
+	cp_index=NULL;
+	cp_nz_dims=NULL;
+	cp_bias=NULL;
+
+	primal_objective = 0.0;
+
+    m_parameters->add(&C1, "C1",  "Cost constant 1.");
+    m_parameters->add(&C2, "C2",  "Cost constant 2.");
+    m_parameters->add(&use_bias, "use_bias",
+			"Indicates if bias is used.");
+    m_parameters->add(&epsilon, "epsilon", "Convergence precision.");
+    m_parameters->add(&bufsize, "bufsize", "Maximum number of cutting planes.");
+    m_parameters->add((machine_int_t*) &method, "method",
+			"SVMOcas solver type.");
+}
+
+float64_t CSVMOcas::compute_primal_objective() const
+{
+	return primal_objective;
+}
+
diff --git a/src/shogun/classifier/svm/SVMOcas.h b/src/shogun/classifier/svm/SVMOcas.h
new file mode 100644
index 00000000000..0f04ebcba55
--- /dev/null
+++ b/src/shogun/classifier/svm/SVMOcas.h
@@ -0,0 +1,239 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Written (W) 2007-2009 Vojtech Franc
+ * Written (W) 2007-2009 Soeren Sonnenburg
+ * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
+ */
+
+
+#ifndef _SVMOCAS_H___
+#define _SVMOCAS_H___
+
+#include <shogun/lib/config.h>
+
+#include <shogun/lib/common.h>
+#include <shogun/machine/LinearMachine.h>
+#include <shogun/lib/external/libocas.h>
+#include <shogun/features/DotFeatures.h>
+#include <shogun/labels/Labels.h>
+
+namespace shogun
+{
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+enum E_SVM_TYPE
+{
+	SVM_OCAS = 0,
+	SVM_BMRM = 1
+};
+#endif
+
+/** @brief class SVMOcas */
+class CSVMOcas : public CLinearMachine
+{
+	public:
+
+		/** problem type */
+		MACHINE_PROBLEM_TYPE(PT_BINARY);
+
+		/** default constructor  */
+		CSVMOcas();
+
+		/** constructor
+		 *
+		 * @param type a E_SVM_TYPE
+		 */
+		CSVMOcas(E_SVM_TYPE type);
+
+		/** constructor
+		 *
+		 * @param C constant C
+		 * @param traindat training features
+		 * @param trainlab labels for training features
+		 */
+		CSVMOcas(
+			float64_t C, CDotFeatures* traindat,
+			CLabels* trainlab);
+		virtual ~CSVMOcas();
+
+		/** get classifier type
+		 *
+		 * @return classifier type SVMOCAS
+		 */
+		virtual EMachineType get_classifier_type() { return CT_SVMOCAS; }
+
+		/** set C
+		 *
+		 * @param c_neg new C constant for negatively labeled examples
+		 * @param c_pos new C constant for positively labeled examples
+		 *
+		 */
+		inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
+
+		/** get C1
+		 *
+		 * @return C1
+		 */
+		inline float64_t get_C1() { return C1; }
+
+		/** get C2
+		 *
+		 * @return C2
+		 */
+		inline float64_t get_C2() { return C2; }
+
+		/** set epsilon
+		 *
+		 * @param eps new epsilon
+		 */
+		inline void set_epsilon(float64_t eps) { epsilon=eps; }
+
+		/** get epsilon
+		 *
+		 * @return epsilon
+		 */
+		inline float64_t get_epsilon() { return epsilon; }
+
+		/** set if bias shall be enabled
+		 *
+		 * @param enable_bias if bias shall be enabled
+		 */
+		inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
+
+		/** check if bias is enabled
+		 *
+		 * @return if bias is enabled
+		 */
+		inline bool get_bias_enabled() { return use_bias; }
+
+		/** set buffer size
+		 *
+		 * @param sz buffer size
+		 */
+		inline void set_bufsize(int32_t sz) { bufsize=sz; }
+
+		/** get buffer size
+		 *
+		 * @return buffer size
+		 */
+		inline int32_t get_bufsize() { return bufsize; }
+
+		/** compute the primal objective value
+		 *
+		 * @return the primal objective
+		 */
+		virtual float64_t compute_primal_objective() const;
+
+	protected:
+		/** compute W
+		 *
+		 * @param sq_norm_W square normed W
+		 * @param dp_WoldW dp W old W
+		 * @param alpha alpha
+		 * @param nSel nSel
+		 * @param ptr ptr
+		 */
+		static void compute_W(
+			float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
+			uint32_t nSel, void* ptr);
+
+		/** update W
+		 *
+		 * @param t t
+		 * @param ptr ptr
+		 * @return something floaty
+		 */
+		static float64_t update_W(float64_t t, void* ptr );
+
+		/** add new cut
+		 *
+		 * @param new_col_H new col H
+		 * @param new_cut new cut
+		 * @param cut_length length of cut
+		 * @param nSel nSel
+		 * @param ptr ptr
+		 */
+		static int add_new_cut(
+			float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
+			uint32_t nSel, void* ptr );
+
+		/** compute output
+		 *
+		 * @param output output
+		 * @param ptr ptr
+		 */
+		static int compute_output( float64_t *output, void* ptr );
+
+		/** sort
+		 *
+		 * @param vals vals
+		 * @param data data
+		 * @param size size
+		 */
+		static int sort( float64_t* vals, float64_t* data, uint32_t size);
+
+		/** print nothing */
+		static inline void print(ocas_return_value_T value)
+		{
+			  return;
+		}
+
+	protected:
+		/** train SVM classifier
+		 *
+		 * @param data training data (parameter can be avoided if distance or
+		 * kernel-based classifiers are used and distance/kernels are
+		 * initialized with train data)
+		 *
+		 * @return whether training was successful
+		 */
+		virtual bool train_machine(CFeatures* data=NULL);
+
+		/** @return object name */
+		inline const char* get_name() const { return "SVMOcas"; }
+	private:
+		void init();
+
+	protected:
+		/** if bias is used */
+		bool use_bias;
+		/** buffer size */
+		int32_t bufsize;
+		/** C1 */
+		float64_t C1;
+		/** C2 */
+		float64_t C2;
+		/** epsilon */
+		float64_t epsilon;
+		/** method */
+		E_SVM_TYPE method;
+
+		/** current W */
+		SGVector<float64_t> current_w;
+		/** old W */
+		float64_t* old_w;
+		/** old bias */
+		float64_t old_bias;
+		/** nDim big */
+		float64_t* tmp_a_buf;
+		/** labels */
+		SGVector<float64_t> lab;
+
+		/** sparse representation of
+		 * cutting planes */
+		float64_t** cp_value;
+		/** cutting plane index */
+		uint32_t** cp_index;
+		/** cutting plane dimensions */
+		uint32_t* cp_nz_dims;
+		/** bias dimensions */
+		float64_t* cp_bias;
+
+		/** primal objective */
+		float64_t primal_objective;
+};
+}
+#endif
diff --git a/src/shogun/classifier/svm/WDSVMOcas.cpp b/src/shogun/classifier/svm/WDSVMOcas.cpp
new file mode 100644
index 00000000000..b4c538321aa
--- /dev/null
+++ b/src/shogun/classifier/svm/WDSVMOcas.cpp
@@ -0,0 +1,641 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+#include <shogun/classifier/svm/WDSVMOcas.h>
+
+#include <shogun/labels/Labels.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/lib/DynamicArray.h>
+#include <shogun/lib/Time.h>
+#include <shogun/base/Parallel.h>
+#include <shogun/machine/Machine.h>
+#include <shogun/lib/external/libocas.h>
+#include <shogun/features/StringFeatures.h>
+#include <shogun/features/Alphabet.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/labels/BinaryLabels.h>
+
+using namespace shogun;
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+struct wdocas_thread_params_output
+{
+	float32_t* out;
+	int32_t* val;
+	float64_t* output;
+	CWDSVMOcas* wdocas;
+	int32_t start;
+	int32_t end;
+};
+
+struct wdocas_thread_params_add
+{
+	CWDSVMOcas* wdocas;
+	float32_t* new_a;
+	uint32_t* new_cut;
+	int32_t start;
+	int32_t end;
+	uint32_t cut_length;
+};
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+
+CWDSVMOcas::CWDSVMOcas()
+: CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
+	epsilon(1e-3), method(SVM_OCAS)
+{
+	SG_UNSTABLE("CWDSVMOcas::CWDSVMOcas()", "\n")
+
+	w=NULL;
+	old_w=NULL;
+	features=NULL;
+	degree=6;
+	from_degree=40;
+	wd_weights=NULL;
+	w_offsets=NULL;
+	normalization_const=1.0;
+}
+
+CWDSVMOcas::CWDSVMOcas(E_SVM_TYPE type)
+: CMachine(), use_bias(false), bufsize(3000), C1(1), C2(1),
+	epsilon(1e-3), method(type)
+{
+	w=NULL;
+	old_w=NULL;
+	features=NULL;
+	degree=6;
+	from_degree=40;
+	wd_weights=NULL;
+	w_offsets=NULL;
+	normalization_const=1.0;
+}
+
+CWDSVMOcas::CWDSVMOcas(
+	float64_t C, int32_t d, int32_t from_d, CStringFeatures<uint8_t>* traindat,
+	CLabels* trainlab)
+: CMachine(), use_bias(false), bufsize(3000), C1(C), C2(C), epsilon(1e-3),
+	degree(d), from_degree(from_d)
+{
+	w=NULL;
+	old_w=NULL;
+	method=SVM_OCAS;
+	features=traindat;
+	set_labels(trainlab);
+	wd_weights=NULL;
+	w_offsets=NULL;
+	normalization_const=1.0;
+}
+
+
+CWDSVMOcas::~CWDSVMOcas()
+{
+}
+
+CBinaryLabels* CWDSVMOcas::apply_binary(CFeatures* data)
+{
+	SGVector<float64_t> outputs = apply_get_outputs(data);
+	return new CBinaryLabels(outputs);
+}
+
+CRegressionLabels* CWDSVMOcas::apply_regression(CFeatures* data)
+{
+	SGVector<float64_t> outputs = apply_get_outputs(data);
+	return new CRegressionLabels(outputs);
+}
+
+SGVector<float64_t> CWDSVMOcas::apply_get_outputs(CFeatures* data)
+{
+	if (data)
+	{
+		if (data->get_feature_class() != C_STRING ||
+				data->get_feature_type() != F_BYTE)
+		{
+			SG_ERROR("Features not of class string type byte\n")
+		}
+
+		set_features((CStringFeatures<uint8_t>*) data);
+	}
+	ASSERT(features)
+
+	set_wd_weights();
+	set_normalization_const();
+
+	SGVector<float64_t> outputs;
+	if (features)
+	{
+		int32_t num=features->get_num_vectors();
+		ASSERT(num>0)
+
+		outputs = SGVector<float64_t>(num);
+
+		for (int32_t i=0; i<num; i++)
+			outputs[i] = apply_one(i);
+	}
+
+	return outputs;
+}
+
+int32_t CWDSVMOcas::set_wd_weights()
+{
+	ASSERT(degree>0 && degree<=8)
+	SG_FREE(wd_weights);
+	wd_weights=SG_MALLOC(float32_t, degree);
+	SG_FREE(w_offsets);
+	w_offsets=SG_MALLOC(int32_t, degree);
+	int32_t w_dim_single_c=0;
+
+	for (int32_t i=0; i<degree; i++)
+	{
+		w_offsets[i]=CMath::pow(alphabet_size, i+1);
+		wd_weights[i]=sqrt(2.0*(from_degree-i)/(from_degree*(from_degree+1)));
+		w_dim_single_c+=w_offsets[i];
+	}
+	return w_dim_single_c;
+}
+
+bool CWDSVMOcas::train_machine(CFeatures* data)
+{
+	SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize)
+
+	ASSERT(m_labels)
+	ASSERT(m_labels->get_label_type() == LT_BINARY)
+	if (data)
+	{
+		if (data->get_feature_class() != C_STRING ||
+				data->get_feature_type() != F_BYTE)
+		{
+			SG_ERROR("Features not of class string type byte\n")
+		}
+		set_features((CStringFeatures<uint8_t>*) data);
+	}
+
+	ASSERT(get_features())
+	CAlphabet* alphabet=get_features()->get_alphabet();
+	ASSERT(alphabet && alphabet->get_alphabet()==RAWDNA)
+
+	alphabet_size=alphabet->get_num_symbols();
+	string_length=features->get_num_vectors();
+	SGVector<float64_t> labvec=((CBinaryLabels*) m_labels)->get_labels();
+	lab=labvec.vector;
+
+	w_dim_single_char=set_wd_weights();
+	//CMath::display_vector(wd_weights, degree, "wd_weights");
+	SG_DEBUG("w_dim_single_char=%d\n", w_dim_single_char)
+	w_dim=string_length*w_dim_single_char;
+	SG_DEBUG("cutting plane has %d dims\n", w_dim)
+	num_vec=get_features()->get_max_vector_length();
+
+	set_normalization_const();
+	SG_INFO("num_vec: %d num_lab: %d\n", num_vec, labvec.vlen)
+	ASSERT(num_vec==labvec.vlen)
+	ASSERT(num_vec>0)
+
+	SG_FREE(w);
+	w=SG_MALLOC(float32_t, w_dim);
+	memset(w, 0, w_dim*sizeof(float32_t));
+
+	SG_FREE(old_w);
+	old_w=SG_MALLOC(float32_t, w_dim);
+	memset(old_w, 0, w_dim*sizeof(float32_t));
+	bias=0;
+	old_bias=0;
+
+	cuts=SG_MALLOC(float32_t*, bufsize);
+	memset(cuts, 0, sizeof(*cuts)*bufsize);
+	cp_bias=SG_MALLOC(float64_t, bufsize);
+	memset(cp_bias, 0, sizeof(float64_t)*bufsize);
+
+/////speed tests/////
+	/*float64_t* tmp = SG_MALLOC(float64_t, num_vec);
+	float64_t start=CTime::get_curtime();
+	CMath::random_vector(w, w_dim, (float32_t) 0, (float32_t) 1000);
+	compute_output(tmp, this);
+	start=CTime::get_curtime()-start;
+	SG_PRINT("timing:%f\n", start)
+	SG_FREE(tmp);
+	exit(1);*/
+/////speed tests/////
+	float64_t TolAbs=0;
+	float64_t QPBound=0;
+	uint8_t Method=0;
+	if (method == SVM_OCAS)
+		Method = 1;
+	ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
+			TolAbs, QPBound, get_max_train_time(), bufsize, Method,
+			&CWDSVMOcas::compute_W,
+			&CWDSVMOcas::update_W,
+			&CWDSVMOcas::add_new_cut,
+			&CWDSVMOcas::compute_output,
+			&CWDSVMOcas::sort,
+			&CWDSVMOcas::print,
+			this);
+
+	SG_INFO("Ocas Converged after %d iterations\n"
+			"==================================\n"
+			"timing statistics:\n"
+			"output_time: %f s\n"
+			"sort_time: %f s\n"
+			"add_time: %f s\n"
+			"w_time: %f s\n"
+			"solver_time %f s\n"
+			"ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
+			result.add_time, result.w_time, result.qp_solver_time, result.ocas_time);
+
+	for (int32_t i=bufsize-1; i>=0; i--)
+		SG_FREE(cuts[i]);
+	SG_FREE(cuts);
+
+	lab=NULL;
+	SG_UNREF(alphabet);
+
+	return true;
+}
+
+/*----------------------------------------------------------------------------------
+  sq_norm_W = sparse_update_W( t ) does the following:
+
+  W = oldW*(1-t) + t*W;
+  sq_norm_W = W'*W;
+
+  ---------------------------------------------------------------------------------*/
+float64_t CWDSVMOcas::update_W( float64_t t, void* ptr )
+{
+  float64_t sq_norm_W = 0;
+  CWDSVMOcas* o = (CWDSVMOcas*) ptr;
+  uint32_t nDim = (uint32_t) o->w_dim;
+  float32_t* W=o->w;
+  float32_t* oldW=o->old_w;
+  float64_t bias=o->bias;
+  float64_t old_bias=bias;
+
+  for(uint32_t j=0; j <nDim; j++)
+  {
+	  W[j] = oldW[j]*(1-t) + t*W[j];
+	  sq_norm_W += W[j]*W[j];
+  }
+
+  bias=old_bias*(1-t) + t*bias;
+  sq_norm_W += CMath::sq(bias);
+
+  o->bias=bias;
+  o->old_bias=old_bias;
+
+  return( sq_norm_W );
+}
+
+/*----------------------------------------------------------------------------------
+  sparse_add_new_cut( new_col_H, new_cut, cut_length, nSel ) does the following:
+
+    new_a = sum(data_X(:,find(new_cut ~=0 )),2);
+    new_col_H = [sparse_A(:,1:nSel)'*new_a ; new_a'*new_a];
+    sparse_A(:,nSel+1) = new_a;
+
+  ---------------------------------------------------------------------------------*/
+void* CWDSVMOcas::add_new_cut_helper( void* ptr)
+{
+	wdocas_thread_params_add* p = (wdocas_thread_params_add*) ptr;
+	CWDSVMOcas* o = p->wdocas;
+	int32_t start = p->start;
+	int32_t end = p->end;
+	int32_t string_length = o->string_length;
+	//uint32_t nDim=(uint32_t) o->w_dim;
+	uint32_t cut_length=p->cut_length;
+	uint32_t* new_cut=p->new_cut;
+	int32_t* w_offsets = o->w_offsets;
+	float64_t* y = o->lab;
+	int32_t alphabet_size = o->alphabet_size;
+	float32_t* wd_weights = o->wd_weights;
+	int32_t degree = o->degree;
+	CStringFeatures<uint8_t>* f = o->features;
+	float64_t normalization_const = o->normalization_const;
+
+	// temporary vector
+	float32_t* new_a = p->new_a;
+	//float32_t* new_a = SG_MALLOC(float32_t, nDim);
+	//memset(new_a, 0, sizeof(float32_t)*nDim);
+
+	int32_t* val=SG_MALLOC(int32_t, cut_length);
+	for (int32_t j=start; j<end; j++)
+	{
+		int32_t offs=o->w_dim_single_char*j;
+		memset(val,0,sizeof(int32_t)*cut_length);
+		int32_t lim=CMath::min(degree, string_length-j);
+		int32_t len;
+
+		for (int32_t k=0; k<lim; k++)
+		{
+			bool free_vec;
+			uint8_t* vec = f->get_feature_vector(j+k, len, free_vec);
+			float32_t wd = wd_weights[k]/normalization_const;
+
+			for(uint32_t i=0; i < cut_length; i++)
+			{
+				val[i]=val[i]*alphabet_size + vec[new_cut[i]];
+				new_a[offs+val[i]]+=wd * y[new_cut[i]];
+			}
+			offs+=w_offsets[k];
+			f->free_feature_vector(vec, j+k, free_vec);
+		}
+	}
+
+	//p->new_a=new_a;
+	SG_FREE(val);
+	return NULL;
+}
+
+int CWDSVMOcas::add_new_cut(
+	float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
+	uint32_t nSel, void* ptr)
+{
+	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
+	uint32_t i;
+	float64_t* c_bias = o->cp_bias;
+	uint32_t nDim=(uint32_t) o->w_dim;
+	float32_t** cuts=o->cuts;
+	SGVector<float32_t> new_a(nDim);
+#ifdef HAVE_PTHREAD
+
+	wdocas_thread_params_add* params_add=SG_MALLOC(wdocas_thread_params_add, o->parallel->get_num_threads());
+	pthread_t* threads=SG_MALLOC(pthread_t, o->parallel->get_num_threads());
+
+	int32_t string_length = o->string_length;
+	int32_t t;
+	int32_t nthreads=o->parallel->get_num_threads()-1;
+	int32_t step= string_length/o->parallel->get_num_threads();
+
+	if (step<1)
+	{
+		nthreads=string_length-1;
+		step=1;
+	}
+
+	for (t=0; t<nthreads; t++)
+	{
+		params_add[t].wdocas=o;
+		//params_add[t].new_a=NULL;
+		params_add[t].new_a=new_a.vector;
+		params_add[t].new_cut=new_cut;
+		params_add[t].start = step*t;
+		params_add[t].end = step*(t+1);
+		params_add[t].cut_length = cut_length;
+
+		if (pthread_create(&threads[t], NULL, &CWDSVMOcas::add_new_cut_helper, (void*)&params_add[t]) != 0)
+		{
+			nthreads=t;
+			SG_SWARNING("thread creation failed\n")
+			break;
+		}
+	}
+
+	params_add[t].wdocas=o;
+	//params_add[t].new_a=NULL;
+	params_add[t].new_a=new_a.vector;
+	params_add[t].new_cut=new_cut;
+	params_add[t].start = step*t;
+	params_add[t].end = string_length;
+	params_add[t].cut_length = cut_length;
+	add_new_cut_helper(&params_add[t]);
+	//float32_t* new_a=params_add[t].new_a;
+
+	for (t=0; t<nthreads; t++)
+	{
+		if (pthread_join(threads[t], NULL) != 0)
+			SG_SWARNING("pthread_join failed\n")
+
+		//float32_t* a=params_add[t].new_a;
+		//for (i=0; i<nDim; i++)
+		//	new_a[i]+=a[i];
+		//SG_FREE(a);
+	}
+	SG_FREE(threads);
+	SG_FREE(params_add);
+#endif /* HAVE_PTHREAD */
+	for(i=0; i < cut_length; i++)
+	{
+		if (o->use_bias)
+			c_bias[nSel]+=o->lab[new_cut[i]];
+	}
+
+	// insert new_a into the last column of sparse_A
+	for(i=0; i < nSel; i++)
+	{
+		SGVector<float32_t> cut_wrap(cuts[i], nDim, false);
+		new_col_H[i] = linalg::dot(new_a, cut_wrap) + c_bias[nSel]*c_bias[i];
+	}
+	new_col_H[nSel] = linalg::dot(new_a, new_a) + CMath::sq(c_bias[nSel]);
+
+	cuts[nSel]=new_a;
+	//CMath::display_vector(new_col_H, nSel+1, "new_col_H");
+	//CMath::display_vector(cuts[nSel], nDim, "cut[nSel]");
+	//
+
+	return 0;
+}
+
+int CWDSVMOcas::sort( float64_t* vals, float64_t* data, uint32_t size)
+{
+	CMath::qsort_index(vals, data, size);
+	return 0;
+}
+
+/*----------------------------------------------------------------------
+  sparse_compute_output( output ) does the follwing:
+
+  output = data_X'*W;
+  ----------------------------------------------------------------------*/
+void* CWDSVMOcas::compute_output_helper(void* ptr)
+{
+	wdocas_thread_params_output* p = (wdocas_thread_params_output*) ptr;
+	CWDSVMOcas* o = p->wdocas;
+	int32_t start = p->start;
+	int32_t end = p->end;
+	float32_t* out = p->out;
+	float64_t* output = p->output;
+	int32_t* val = p->val;
+
+	CStringFeatures<uint8_t>* f=o->get_features();
+
+	int32_t degree = o->degree;
+	int32_t string_length = o->string_length;
+	int32_t alphabet_size = o->alphabet_size;
+	int32_t* w_offsets = o->w_offsets;
+	float32_t* wd_weights = o->wd_weights;
+	float32_t* w= o->w;
+
+	float64_t* y = o->lab;
+	float64_t normalization_const = o->normalization_const;
+
+
+	for (int32_t j=0; j<string_length; j++)
+	{
+		int32_t offs=o->w_dim_single_char*j;
+		for (int32_t i=start ; i<end; i++)
+			val[i]=0;
+
+		int32_t lim=CMath::min(degree, string_length-j);
+		int32_t len;
+
+		for (int32_t k=0; k<lim; k++)
+		{
+			bool free_vec;
+			uint8_t* vec=f->get_feature_vector(j+k, len, free_vec);
+			float32_t wd = wd_weights[k];
+
+			for (int32_t i=start; i<end; i++) // quite fast 1.9s
+			{
+				val[i]=val[i]*alphabet_size + vec[i];
+				out[i]+=wd*w[offs+val[i]];
+			}
+
+			/*for (int32_t i=0; i<nData/4; i++) // slowest 2s
+			{
+				uint32_t x=((uint32_t*) vec)[i];
+				int32_t ii=4*i;
+				val[ii]=val[ii]*alphabet_size + (x&255);
+				val[ii+1]=val[ii+1]*alphabet_size + ((x>>8)&255);
+				val[ii+2]=val[ii+2]*alphabet_size + ((x>>16)&255);
+				val[ii+3]=val[ii+3]*alphabet_size + (x>>24);
+				out[ii]+=wd*w[offs+val[ii]];
+				out[ii+1]+=wd*w[offs+val[ii+1]];
+				out[ii+2]+=wd*w[offs+val[ii+2]];
+				out[ii+3]+=wd*w[offs+val[ii+3]];
+			}*/
+
+			/*for (int32_t i=0; i<nData>>3; i++) // fastest on 64bit: 1.5s
+			{
+				uint64_t x=((uint64_t*) vec)[i];
+				int32_t ii=i<<3;
+				val[ii]=val[ii]*alphabet_size + (x&255);
+				val[ii+1]=val[ii+1]*alphabet_size + ((x>>8)&255);
+				val[ii+2]=val[ii+2]*alphabet_size + ((x>>16)&255);
+				val[ii+3]=val[ii+3]*alphabet_size + ((x>>24)&255);
+				val[ii+4]=val[ii+4]*alphabet_size + ((x>>32)&255);
+				val[ii+5]=val[ii+5]*alphabet_size + ((x>>40)&255);
+				val[ii+6]=val[ii+6]*alphabet_size + ((x>>48)&255);
+				val[ii+7]=val[ii+7]*alphabet_size + (x>>56);
+				out[ii]+=wd*w[offs+val[ii]];
+				out[ii+1]+=wd*w[offs+val[ii+1]];
+				out[ii+2]+=wd*w[offs+val[ii+2]];
+				out[ii+3]+=wd*w[offs+val[ii+3]];
+				out[ii+4]+=wd*w[offs+val[ii+4]];
+				out[ii+5]+=wd*w[offs+val[ii+5]];
+				out[ii+6]+=wd*w[offs+val[ii+6]];
+				out[ii+7]+=wd*w[offs+val[ii+7]];
+			}*/
+			offs+=w_offsets[k];
+			f->free_feature_vector(vec, j+k, free_vec);
+		}
+	}
+
+	for (int32_t i=start; i<end; i++)
+		output[i]=y[i]*o->bias + out[i]*y[i]/normalization_const;
+
+	//CMath::display_vector(o->w, o->w_dim, "w");
+	//CMath::display_vector(output, nData, "out");
+	return NULL;
+}
+
+int CWDSVMOcas::compute_output( float64_t *output, void* ptr )
+{
+#ifdef HAVE_PTHREAD
+	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
+	int32_t nData=o->num_vec;
+	wdocas_thread_params_output* params_output=SG_MALLOC(wdocas_thread_params_output, o->parallel->get_num_threads());
+	pthread_t* threads = SG_MALLOC(pthread_t, o->parallel->get_num_threads());
+
+	float32_t* out=SG_MALLOC(float32_t, nData);
+	int32_t* val=SG_MALLOC(int32_t, nData);
+	memset(out, 0, sizeof(float32_t)*nData);
+
+	int32_t t;
+	int32_t nthreads=o->parallel->get_num_threads()-1;
+	int32_t step= nData/o->parallel->get_num_threads();
+
+	if (step<1)
+	{
+		nthreads=nData-1;
+		step=1;
+	}
+
+	for (t=0; t<nthreads; t++)
+	{
+		params_output[t].wdocas=o;
+		params_output[t].output=output;
+		params_output[t].out=out;
+		params_output[t].val=val;
+		params_output[t].start = step*t;
+		params_output[t].end = step*(t+1);
+
+		//SG_SPRINT("t=%d start=%d end=%d output=%p\n", t, params_output[t].start, params_output[t].end, params_output[t].output)
+		if (pthread_create(&threads[t], NULL, &CWDSVMOcas::compute_output_helper, (void*)&params_output[t]) != 0)
+		{
+			nthreads=t;
+			SG_SWARNING("thread creation failed\n")
+			break;
+		}
+	}
+
+	params_output[t].wdocas=o;
+	params_output[t].output=output;
+	params_output[t].out=out;
+	params_output[t].val=val;
+	params_output[t].start = step*t;
+	params_output[t].end = nData;
+	compute_output_helper(&params_output[t]);
+	//SG_SPRINT("t=%d start=%d end=%d output=%p\n", t, params_output[t].start, params_output[t].end, params_output[t].output)
+
+	for (t=0; t<nthreads; t++)
+	{
+		if (pthread_join(threads[t], NULL) != 0)
+			SG_SWARNING("pthread_join failed\n")
+	}
+	SG_FREE(threads);
+	SG_FREE(params_output);
+	SG_FREE(val);
+	SG_FREE(out);
+#endif /* HAVE_PTHREAD */
+	return 0;
+}
+/*----------------------------------------------------------------------
+  sq_norm_W = compute_W( alpha, nSel ) does the following:
+
+  oldW = W;
+  W = sparse_A(:,1:nSel)'*alpha;
+  sq_norm_W = W'*W;
+  dp_WoldW = W'*oldW';
+
+  ----------------------------------------------------------------------*/
+void CWDSVMOcas::compute_W(
+	float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, uint32_t nSel,
+	void* ptr)
+{
+	CWDSVMOcas* o = (CWDSVMOcas*) ptr;
+	uint32_t nDim= (uint32_t) o->w_dim;
+	CMath::swap(o->w, o->old_w);
+	SGVector<float32_t> W(o->w, nDim, false);
+	linalg::zero(W);
+	SGVector<float32_t> oldW(o->old_w, nDim, false);
+	float32_t** cuts=o->cuts;
+	float64_t* c_bias = o->cp_bias;
+	float64_t old_bias=o->bias;
+	float64_t bias=0;
+
+	for (uint32_t i=0; i<nSel; i++)
+	{
+		if (alpha[i] > 0)
+			SGVector<float32_t>::vec1_plus_scalar_times_vec2(W.vector, (float32_t) alpha[i], cuts[i], nDim);
+
+		bias += c_bias[i]*alpha[i];
+	}
+
+	*sq_norm_W = linalg::dot(W, W) +CMath::sq(bias);
+	*dp_WoldW = linalg::dot(W, oldW) + bias*old_bias;;
+	//SG_PRINT("nSel=%d sq_norm_W=%f dp_WoldW=%f\n", nSel, *sq_norm_W, *dp_WoldW)
+
+	o->bias = bias;
+	o->old_bias = old_bias;
+}
+
diff --git a/src/shogun/classifier/svm/WDSVMOcas.h b/src/shogun/classifier/svm/WDSVMOcas.h
new file mode 100644
index 00000000000..fd9164bee7a
--- /dev/null
+++ b/src/shogun/classifier/svm/WDSVMOcas.h
@@ -0,0 +1,371 @@
+#ifndef _WDSVMOCAS_H___
+#define _WDSVMOCAS_H___
+
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+
+#include <shogun/lib/config.h>
+
+#include <shogun/lib/common.h>
+#include <shogun/machine/Machine.h>
+#include <shogun/classifier/svm/SVMOcas.h>
+#include <shogun/features/StringFeatures.h>
+#include <shogun/labels/Labels.h>
+
+namespace shogun
+{
+template <class ST> class CStringFeatures;
+
+/** @brief class WDSVMOcas */
+class CWDSVMOcas : public CMachine
+{
+	public:
+		/** problem type */
+		MACHINE_PROBLEM_TYPE(PT_BINARY);
+
+		/** default constructor  */
+		CWDSVMOcas();
+
+		/** constructor
+		 *
+		 * @param type type of SVM
+		 */
+		CWDSVMOcas(E_SVM_TYPE type);
+
+		/** constructor
+		 *
+		 * @param C constant C
+		 * @param d degree
+		 * @param from_d from degree
+		 * @param traindat training features
+		 * @param trainlab labels for training features
+		 */
+		CWDSVMOcas(
+			float64_t C, int32_t d, int32_t from_d,
+			CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
+		virtual ~CWDSVMOcas();
+
+		/** get classifier type
+		 *
+		 * @return classifier type WDSVMOCAS
+		 */
+		virtual EMachineType get_classifier_type() { return CT_WDSVMOCAS; }
+
+		/** set C
+		 *
+		 * @param c_neg new C constant for negatively labeled examples
+		 * @param c_pos new C constant for positively labeled examples
+		 *
+		 */
+		inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; }
+
+		/** get C1
+		 *
+		 * @return C1
+		 */
+		inline float64_t get_C1() { return C1; }
+
+		/** get C2
+		 *
+		 * @return C2
+		 */
+		inline float64_t get_C2() { return C2; }
+
+		/** set epsilon
+		 *
+		 * @param eps new epsilon
+		 */
+		inline void set_epsilon(float64_t eps) { epsilon=eps; }
+
+		/** get epsilon
+		 *
+		 * @return epsilon
+		 */
+		inline float64_t get_epsilon() { return epsilon; }
+
+		/** set features
+		 *
+		 * @param feat features to set
+		 */
+		inline void set_features(CStringFeatures<uint8_t>* feat)
+		{
+			SG_REF(feat);
+			SG_UNREF(features);
+			features=feat;
+		}
+
+		/** get features
+		 *
+		 * @return features
+		 */
+		inline CStringFeatures<uint8_t>* get_features()
+		{
+			SG_REF(features);
+			return features;
+		}
+
+		/** set if bias shall be enabled
+		 *
+		 * @param enable_bias if bias shall be enabled
+		 */
+		inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
+
+		/** check if bias is enabled
+		 *
+		 * @return if bias is enabled
+		 */
+		inline bool get_bias_enabled() { return use_bias; }
+
+		/** set buffer size
+		 *
+		 * @param sz buffer size
+		 */
+		inline void set_bufsize(int32_t sz) { bufsize=sz; }
+
+		/** get buffer size
+		 *
+		 * @return buffer size
+		 */
+		inline int32_t get_bufsize() { return bufsize; }
+
+		/** set degree
+		 *
+		 * @param d degree
+		 * @param from_d from degree
+		 */
+		inline void set_degree(int32_t d, int32_t from_d)
+		{
+			degree=d;
+			from_degree=from_d;
+		}
+
+		/** get degree
+		 *
+		 * @return degree
+		 */
+		inline int32_t get_degree() { return degree; }
+
+		/** classify objects
+		 * for binary classification problems
+		 *
+		 * @param data (test)data to be classified
+		 * @return classified labels
+		 */
+		virtual CBinaryLabels* apply_binary(CFeatures* data=NULL);
+
+		/** classify objects
+		 * for regression problems
+		 *
+		 * @param data (test)data to be classified
+		 * @return classified labels
+		 */
+		virtual CRegressionLabels* apply_regression(CFeatures* data=NULL);
+
+		/** classify one example
+		 *
+		 * @param num number of example to classify
+		 * @return classified result
+		 */
+		virtual float64_t apply_one(int32_t num)
+		{
+			ASSERT(features)
+			if (!wd_weights)
+				set_wd_weights();
+
+			int32_t len=0;
+			float64_t sum=0;
+			bool free_vec;
+			uint8_t* vec=features->get_feature_vector(num, len, free_vec);
+			//SG_INFO("len %d, string_length %d\n", len, string_length)
+			ASSERT(len==string_length)
+
+			for (int32_t j=0; j<string_length; j++)
+			{
+				int32_t offs=w_dim_single_char*j;
+				int32_t val=0;
+				for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
+				{
+					val=val*alphabet_size + vec[j+k];
+					sum+=wd_weights[k] * w[offs+val];
+					offs+=w_offsets[k];
+				}
+			}
+			features->free_feature_vector(vec, num, free_vec);
+			return sum/normalization_const;
+		}
+
+		/** set normalization const */
+		inline void set_normalization_const()
+		{
+			ASSERT(features)
+			normalization_const=0;
+			for (int32_t i=0; i<degree; i++)
+				normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
+
+			normalization_const=CMath::sqrt(normalization_const);
+			SG_DEBUG("normalization_const:%f\n", normalization_const)
+		}
+
+		/** get normalization const
+		 *
+		 * @return normalization const
+		 */
+		inline float64_t get_normalization_const() { return normalization_const; }
+
+
+	protected:
+
+		/** get real outputs
+		 *
+		 * @param data features to apply for
+		 */
+		SGVector<float64_t> apply_get_outputs(CFeatures* data);
+
+		/** set wd weights
+		 *
+		 * @return w_dim_single_c
+		 */
+		int32_t set_wd_weights();
+
+		/** compute W
+		 *
+		 * @param sq_norm_W square normed W
+		 * @param dp_WoldW dp W old W
+		 * @param alpha alpha
+		 * @param nSel nSel
+		 * @param ptr ptr
+		 */
+		static void compute_W(
+			float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
+			uint32_t nSel, void* ptr );
+
+		/** update W
+		 *
+		 * @param t t
+		 * @param ptr ptr
+		 * @return something floaty
+		 */
+		static float64_t update_W(float64_t t, void* ptr );
+
+		/** helper function for adding a new cut
+		 *
+		 * @param ptr
+		 * @return ptr
+		 */
+		static void* add_new_cut_helper(void* ptr);
+
+		/** add new cut
+		 *
+		 * @param new_col_H new col H
+		 * @param new_cut new cut
+		 * @param cut_length length of cut
+		 * @param nSel nSel
+		 * @param ptr ptr
+		 */
+		static int add_new_cut(
+			float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
+			uint32_t nSel, void* ptr );
+
+		/** helper function for computing the output
+		 *
+		 * @param ptr
+		 * @return ptr
+		 */
+		static void* compute_output_helper(void* ptr);
+
+		/** compute output
+		 *
+		 * @param output output
+		 * @param ptr ptr
+		 */
+		static int compute_output( float64_t *output, void* ptr );
+
+		/** sort
+		 *
+		 * @param vals vals
+		 * @param data data
+		 * @param size size
+		 */
+		static int sort( float64_t* vals, float64_t* data, uint32_t size);
+
+		/** print nothing */
+		static inline void print(ocas_return_value_T value)
+		{
+			  return;
+		}
+
+
+		/** @return object name */
+		virtual const char* get_name() const { return "WDSVMOcas"; }
+
+	protected:
+		/** train classifier
+		 *
+		 * @param data training data (parameter can be avoided if distance or
+		 * kernel-based classifiers are used and distance/kernels are
+		 * initialized with train data)
+		 *
+		 * @return whether training was successful
+		 */
+		virtual bool train_machine(CFeatures* data=NULL);
+
+	protected:
+		/** features */
+		CStringFeatures<uint8_t>* features;
+		/** if bias shall be used */
+		bool use_bias;
+		/** buffer size */
+		int32_t bufsize;
+		/** C1 */
+		float64_t C1;
+		/** C2 */
+		float64_t C2;
+		/** epsilon */
+		float64_t epsilon;
+		/** method */
+		E_SVM_TYPE method;
+
+		/** degree */
+		int32_t degree;
+		/** from degree */
+		int32_t from_degree;
+		/** wd weights */
+		float32_t* wd_weights;
+		/** num vectors */
+		int32_t num_vec;
+		/** length of string in vector */
+		int32_t string_length;
+		/** size of alphabet */
+		int32_t alphabet_size;
+
+		/** normalization const */
+		float64_t normalization_const;
+
+		/** bias */
+		float64_t bias;
+		/** old_bias */
+		float64_t old_bias;
+		/** w offsets */
+		int32_t* w_offsets;
+		/** w dim */
+		int32_t w_dim;
+		/** w dim of a single char */
+		int32_t w_dim_single_char;
+		/** w */
+		float32_t* w;
+		/** old w*/
+		float32_t* old_w;
+		/** labels */
+		float64_t* lab;
+
+		/** cuts */
+		float32_t** cuts;
+		/** bias dimensions */
+		float64_t* cp_bias;
+};
+}
+#endif
diff --git a/src/shogun/latent/LatentSVM.cpp b/src/shogun/latent/LatentSVM.cpp
new file mode 100644
index 00000000000..d8c92336788
--- /dev/null
+++ b/src/shogun/latent/LatentSVM.cpp
@@ -0,0 +1,74 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Viktor Gal
+ */
+
+#include <shogun/latent/LatentSVM.h>
+#include <typeinfo>
+
+#include <shogun/classifier/svm/SVMOcas.h>
+#include <shogun/latent/LatentModel.h>
+
+using namespace shogun;
+
+CLatentSVM::CLatentSVM()
+	: CLinearLatentMachine()
+{
+}
+
+CLatentSVM::CLatentSVM(CLatentModel* model, float64_t C)
+	: CLinearLatentMachine(model, C)
+{
+}
+
+CLatentSVM::~CLatentSVM()
+{
+}
+
+CLatentLabels* CLatentSVM::apply_latent()
+{
+	if (!m_model)
+		SG_ERROR("LatentModel is not set!\n")
+
+	if (m_model->get_num_vectors() < 1)
+		return NULL;
+
+	SGVector<float64_t> w = get_w();
+	index_t num_examples = m_model->get_num_vectors();
+	CLatentLabels* hs = new CLatentLabels(num_examples);
+	CBinaryLabels* ys = new CBinaryLabels(num_examples);
+	hs->set_labels(ys);
+	m_model->set_labels(hs);
+
+	for (index_t i = 0; i < num_examples; ++i)
+	{
+		/* find h for the example */
+		CData* h = m_model->infer_latent_variable(w, i);
+		hs->add_latent_label(h);
+	}
+
+	/* compute the y labels */
+	CDotFeatures* x = m_model->get_psi_feature_vectors();
+	x->dense_dot_range(ys->get_labels().vector, 0, num_examples, NULL, w.vector, w.vlen, 0.0);
+
+	return hs;
+}
+
+float64_t CLatentSVM::do_inner_loop(float64_t cooling_eps)
+{
+	CLabels* ys = m_model->get_labels()->get_labels();
+	CDotFeatures* feats = (m_model->get_caching() ?
+			m_model->get_cached_psi_features() :
+			m_model->get_psi_feature_vectors());
+	CSVMOcas svm(m_C, feats, ys);
+	svm.set_epsilon(cooling_eps);
+	svm.train();
+	SG_UNREF(ys);
+	SG_UNREF(feats);
+
+	/* copy the resulting w */
+	set_w(svm.get_w().clone());
+
+	return svm.compute_primal_objective();
+}
diff --git a/src/shogun/latent/LatentSVM.h b/src/shogun/latent/LatentSVM.h
new file mode 100644
index 00000000000..4783435dc09
--- /dev/null
+++ b/src/shogun/latent/LatentSVM.h
@@ -0,0 +1,74 @@
+#ifndef __LATENTSVM_H__
+#define __LATENTSVM_H__
+
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Viktor Gal
+ */
+
+#include <shogun/lib/config.h>
+#include <shogun/lib/common.h>
+#include <shogun/machine/LinearLatentMachine.h>
+
+namespace shogun
+{
+	class LatentModel;
+
+	/** @brief LatentSVM class
+	 * Latent SVM implementation based on [1].
+	 * For optimization this implementation uses SVMOcas.
+	 *
+	 * User must provide a her own CLatentModel which implements the PSI(x_i,h_i)
+	 * function for the given problem.
+	 *
+	 * [1] P. F. Felzenszwalb, R. B. Girshick, D. McAllester, and D. Ramanan,
+	 *  "Object detection with discriminatively trained part-based models,"
+	 *  Pattern Analysis and Machine Intelligence,
+	 *  IEEE Transactions on, vol. 32, no. 9, pp. 1627-1645, 2010.
+	 *
+	 */
+	class CLatentSVM: public CLinearLatentMachine
+	{
+		public:
+			/** default contstructor */
+			CLatentSVM();
+
+			/** constructor
+			 *
+			 * @param model the user defined CLatentModel object.
+			 * @param C regularization constant
+			 */
+			CLatentSVM(CLatentModel* model, float64_t C);
+
+			virtual ~CLatentSVM();
+
+			/** apply linear machine to all examples
+			 *
+			 * @return resulting labels
+			 */
+			virtual CLatentLabels* apply_latent();
+
+			using CLinearLatentMachine::apply_latent;
+
+			/** Returns the name of the SGSerializable instance.
+			 *
+			 * @return name of the SGSerializable
+			 */
+			virtual const char* get_name() const { return "LatentSVM"; }
+
+		protected:
+			/** inner loop of the latent machine
+			 *
+			 * The optimization part after finding the argmax_h for the
+			 * positive examples in the outter loop. It uses SVMOcas for
+			 * finding the cutting plane.
+			 *
+			 * @param cooling_eps epsilon
+			 * @return primal objective value
+			 */
+			virtual float64_t do_inner_loop(float64_t cooling_eps);
+	};
+}
+
+#endif /* __LATENTSVM_H__ */
diff --git a/src/shogun/lib/external/libocas.cpp b/src/shogun/lib/external/libocas.cpp
new file mode 100644
index 00000000000..5683f9bea68
--- /dev/null
+++ b/src/shogun/lib/external/libocas.cpp
@@ -0,0 +1,2002 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+#include <shogun/lib/config.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#ifndef _WIN32
+#include <sys/time.h>
+#endif
+#include <time.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <shogun/base/progress.h>
+#include <shogun/lib/Time.h>
+#include <shogun/lib/external/libocas.h>
+#include <shogun/lib/external/libocas_common.h>
+#include <shogun/lib/external/libqp.h>
+
+namespace shogun
+{
+#define MU 0.1      /* must be from (0,1>   1..means that OCAS becomes equivalent to CPA */
+                    /* see paper Franc&Sonneburg JMLR 2009 */
+
+static const uint32_t QPSolverMaxIter = 10000000;
+
+static float64_t *H;
+static uint32_t BufSize;
+
+/*----------------------------------------------------------------------
+ Returns pointer at i-th column of Hessian matrix.
+  ----------------------------------------------------------------------*/
+static const float64_t *get_col( uint32_t i)
+{
+  return( &H[ BufSize*i ] );
+}
+
+/*----------------------------------------------------------------------
+  Returns time of the day in seconds.
+  ----------------------------------------------------------------------*/
+static float64_t get_time()
+{
+	struct timeval tv;
+	if (gettimeofday(&tv, NULL)==0)
+		return tv.tv_sec+((float64_t)(tv.tv_usec))/1e6;
+	else
+		return 0.0;
+}
+
+
+/*----------------------------------------------------------------------
+  Linear binary Ocas-SVM solver with additinal contraint enforceing
+  a subset of weights (indices of the weights given by num_nnw/nnw_idx)
+  to be non-negative.
+
+  ----------------------------------------------------------------------*/
+ocas_return_value_T svm_ocas_solver_nnw(
+            float64_t C,
+            uint32_t nData,
+            uint32_t num_nnw,
+            uint32_t* nnw_idx,
+            float64_t TolRel,
+            float64_t TolAbs,
+            float64_t QPBound,
+            float64_t MaxTime,
+            uint32_t _BufSize,
+            uint8_t Method,
+            int (*add_pw_constr)(uint32_t, uint32_t, void*),
+            void (*clip_neg_W)(uint32_t, uint32_t*, void*),
+            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+            float64_t (*update_W)(float64_t, void*),
+            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+            int (*compute_output)(float64_t*, void* ),
+            int (*sort)(float64_t*, float64_t*, uint32_t),
+                        void (*ocas_print)(ocas_return_value_T),
+                        void* user_data)
+{
+  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  float64_t *b, *alpha, *diag_H;
+  float64_t *output, *old_output;
+  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
+  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
+  float64_t start_time, ocas_start_time;
+  uint32_t cut_length;
+  uint32_t i, *new_cut;
+  uint32_t *I;
+/*  uint8_t S = 1; */
+  libqp_state_T qp_exitflag;
+
+  float64_t max_cp_norm;
+  float64_t max_b;
+  float64_t Cs[2];
+  uint8_t S[2];
+
+  ocas_start_time = get_time();
+  ocas.qp_solver_time = 0;
+  ocas.output_time = 0;
+  ocas.sort_time = 0;
+  ocas.add_time = 0;
+  ocas.w_time = 0;
+  ocas.print_time = 0;
+
+  BufSize = _BufSize;
+
+  QPSolverTolRel = TolRel*0.5;
+
+  H=NULL;
+  b=NULL;
+  alpha=NULL;
+  new_cut=NULL;
+  I=NULL;
+  diag_H=NULL;
+  output=NULL;
+  old_output=NULL;
+  hpf=NULL;
+  hpb = NULL;
+  Ci=NULL;
+  Bi=NULL;
+
+  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
+  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
+  if(H == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  /* bias of cutting planes */
+  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(b == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(alpha == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  /* indices of examples which define a new cut */
+  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
+  if(new_cut == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
+  if(I == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  for(i=0; i< BufSize; i++) I[i] = 2;
+
+  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(diag_H == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(output == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(old_output == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  /* array of hinge points used in line-serach  */
+  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpf == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpb == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  /* vectors Ci, Bi are used in the line search procedure */
+  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Ci == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Bi == NULL)
+  {
+          ocas.exitflag=-2;
+          goto cleanup;
+  }
+
+  /* initial cutting planes implementing the non-negativity constraints on W*/
+  Cs[0]=10000000.0;
+  Cs[1]=C;
+  S[0]=1;
+  S[1]=1;
+  for(i=0; i < num_nnw; i++)
+  {
+      if(add_pw_constr(nnw_idx[i],i, user_data) != 0)
+      {
+          ocas.exitflag=-2;
+          goto cleanup;
+      }
+      diag_H[i] = 1.0;
+      H[LIBOCAS_INDEX(i,i,BufSize)] = 1.0;
+      I[i] = 1;
+  }
+
+  max_cp_norm = 1;
+  max_b = 0;
+
+  /*  */
+  ocas.nCutPlanes = num_nnw;
+  ocas.exitflag = 0;
+  ocas.nIter = 0;
+
+  /* Compute initial value of Q_P assuming that W is zero vector.*/
+  sq_norm_W = 0;
+  xi = nData;
+  ocas.Q_P = 0.5*sq_norm_W + C*xi;
+  ocas.Q_D = 0;
+
+  /* Compute the initial cutting plane */
+  cut_length = nData;
+  for(i=0; i < nData; i++)
+    new_cut[i] = i;
+
+  ocas.trn_err = nData;
+  ocas.ocas_time = get_time() - ocas_start_time;
+  /*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, Q_P-Q_D/abs(Q_P)=%f\n",
+          ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
+  */
+  ocas_print(ocas);
+
+  /* main loop */
+  while( ocas.exitflag == 0 )
+  {
+    ocas.nIter++;
+
+    /* append a new cut to the buffer and update H */
+    b[ocas.nCutPlanes] = -(float64_t)cut_length;
+
+    max_b = LIBOCAS_MAX(max_b,(float64_t)cut_length);
+
+    start_time = get_time();
+
+    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, cut_length, ocas.nCutPlanes, user_data ) != 0)
+    {
+          ocas.exitflag=-2;
+          goto cleanup;
+    }
+
+    ocas.add_time += get_time() - start_time;
+
+    /* copy new added row:  H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
+    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
+    for(i=0; i < ocas.nCutPlanes; i++) {
+      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
+    }
+
+    max_cp_norm = LIBOCAS_MAX(max_cp_norm, sqrt(diag_H[ocas.nCutPlanes]));
+
+
+    ocas.nCutPlanes++;
+
+    /* call inner QP solver */
+    start_time = get_time();
+
+    /* compute upper bound on sum of dual variables associated with the positivity constraints */
+    Cs[0] = sqrt((float64_t)nData)*(sqrt(C)*sqrt(max_b) + C*max_cp_norm);
+
+/*    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,
+                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);*/
+    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, Cs, I, S, alpha,
+                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
+
+    ocas.qp_exitflag = qp_exitflag.exitflag;
+
+    ocas.qp_solver_time += get_time() - start_time;
+    ocas.Q_D = -qp_exitflag.QP;
+
+    ocas.nNZAlpha = 0;
+    for(i=0; i < ocas.nCutPlanes; i++) {
+      if( alpha[i] != 0) ocas.nNZAlpha++;
+    }
+
+    sq_norm_oldW = sq_norm_W;
+    start_time = get_time();
+    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
+    clip_neg_W(num_nnw, nnw_idx, user_data);
+    ocas.w_time += get_time() - start_time;
+
+    /* select a new cut */
+    switch( Method )
+    {
+      /* cutting plane algorithm implemented in SVMperf and BMRM */
+      case 0:
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        xi = 0;
+        cut_length = 0;
+        ocas.trn_err = 0;
+        for(i=0; i < nData; i++)
+        {
+          if(output[i] <= 0) ocas.trn_err++;
+
+          if(output[i] <= 1) {
+            xi += 1 - output[i];
+            new_cut[cut_length] = i;
+            cut_length++;
+          }
+        }
+        ocas.Q_P = 0.5*sq_norm_W + C*xi;
+
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+                  ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+                  ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+        */
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+
+
+      /* Ocas strategy */
+      case 1:
+
+        /* Linesearch */
+        A0 = sq_norm_W -2*dot_prod_WoldW + sq_norm_oldW;
+        B0 = dot_prod_WoldW - sq_norm_oldW;
+
+        sg_memcpy( old_output, output, sizeof(float64_t)*nData );
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        uint32_t num_hp = 0;
+        GradVal = B0;
+        for(i=0; i< nData; i++) {
+
+          Ci[i] = C*(1-old_output[i]);
+          Bi[i] = C*(old_output[i] - output[i]);
+
+          float64_t val;
+          if(Bi[i] != 0)
+            val = -Ci[i]/Bi[i];
+          else
+            val = -LIBOCAS_PLUS_INF;
+
+          if (val>0)
+          {
+/*            hpi[num_hp] = i;*/
+            hpb[num_hp] = Bi[i];
+            hpf[num_hp] = val;
+            num_hp++;
+          }
+
+          if( (Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
+            GradVal += Bi[i];
+
+        }
+
+        t = 0;
+        if( GradVal < 0 )
+        {
+          start_time = get_time();
+/*          if( sort(hpf, hpi, num_hp) != 0)*/
+          if( sort(hpf, hpb, num_hp) != 0 )
+          {
+            ocas.exitflag=-2;
+            goto cleanup;
+          }
+          ocas.sort_time += get_time() - start_time;
+
+          float64_t t_new, GradVal_new;
+          i = 0;
+          while( GradVal < 0 && i < num_hp )
+          {
+            t_new = hpf[i];
+            GradVal_new = GradVal + LIBOCAS_ABS(hpb[i]) + A0*(t_new-t);
+
+            if( GradVal_new >= 0 )
+            {
+              t = t + GradVal*(t-t_new)/(GradVal_new - GradVal);
+            }
+            else
+            {
+              t = t_new;
+              i++;
+            }
+
+            GradVal = GradVal_new;
+          }
+        }
+
+        /*
+        t = hpf[0] - 1;
+        i = 0;
+        GradVal = t*A0 + Bsum;
+        while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
+          t = hpf[i];
+          Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
+          GradVal = t*A0 + Bsum;
+          i++;
+        }
+        */
+        t = LIBOCAS_MAX(t,0);          /* just sanity check; t < 0 should not ocure */
+
+        /* this guarantees that the new solution will not violate the positivity constraints on W */
+        t = LIBOCAS_MIN(t,1);
+
+        t1 = t;                /* new (best so far) W */
+        t2 = t+MU*(1.0-t);   /* new cutting plane */
+        /*        t2 = t+(1.0-t)/10.0;   */
+
+        /* update W to be the best so far solution */
+        sq_norm_W = update_W( t1, user_data );
+
+        /* select a new cut */
+        xi = 0;
+        cut_length = 0;
+        ocas.trn_err = 0;
+        for(i=0; i < nData; i++ ) {
+
+          if( (old_output[i]*(1-t2) + t2*output[i]) <= 1 )
+          {
+            new_cut[cut_length] = i;
+            cut_length++;
+          }
+
+          output[i] = old_output[i]*(1-t1) + t1*output[i];
+
+          if( output[i] <= 1) xi += 1-output[i];
+          if( output[i] <= 0) ocas.trn_err++;
+
+        }
+
+        ocas.Q_P = 0.5*sq_norm_W + C*xi;
+
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+                   ocas.nIter, cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+                   ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+        */
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+    }
+
+    /* Stopping conditions */
+    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
+    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
+    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
+    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
+    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
+
+  } /* end of the main loop */
+
+cleanup:
+
+  LIBOCAS_FREE(H);
+  LIBOCAS_FREE(b);
+  LIBOCAS_FREE(alpha);
+  LIBOCAS_FREE(new_cut);
+  LIBOCAS_FREE(I);
+  LIBOCAS_FREE(diag_H);
+  LIBOCAS_FREE(output);
+  LIBOCAS_FREE(old_output);
+  LIBOCAS_FREE(hpf);
+/*  LIBOCAS_FREE(hpi);*/
+  LIBOCAS_FREE(hpb);
+  LIBOCAS_FREE(Ci);
+  LIBOCAS_FREE(Bi);
+
+  ocas.ocas_time = get_time() - ocas_start_time;
+
+  return(ocas);
+}
+
+
+
+/*----------------------------------------------------------------------
+  Linear binary Ocas-SVM solver.
+  ----------------------------------------------------------------------*/
+ocas_return_value_T svm_ocas_solver(
+            float64_t C,
+            uint32_t nData,
+            float64_t TolRel,
+            float64_t TolAbs,
+            float64_t QPBound,
+            float64_t MaxTime,
+            uint32_t _BufSize,
+            uint8_t Method,
+            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+            float64_t (*update_W)(float64_t, void*),
+            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+            int (*compute_output)(float64_t*, void* ),
+            int (*sort)(float64_t*, float64_t*, uint32_t),
+			void (*ocas_print)(ocas_return_value_T),
+			void* user_data)
+{
+  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  float64_t *b, *alpha, *diag_H;
+  float64_t *output, *old_output;
+  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
+  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
+  float64_t start_time, ocas_start_time;
+  uint32_t cut_length;
+  uint32_t i, *new_cut;
+  uint32_t *I;
+  uint8_t S = 1;
+  libqp_state_T qp_exitflag;
+
+  ocas_start_time = get_time();
+  ocas.qp_solver_time = 0;
+  ocas.output_time = 0;
+  ocas.sort_time = 0;
+  ocas.add_time = 0;
+  ocas.w_time = 0;
+  ocas.print_time = 0;
+  float64_t gap;
+
+  BufSize = _BufSize;
+
+  QPSolverTolRel = TolRel*0.5;
+
+  H=NULL;
+  b=NULL;
+  alpha=NULL;
+  new_cut=NULL;
+  I=NULL;
+  diag_H=NULL;
+  output=NULL;
+  old_output=NULL;
+  hpf=NULL;
+  hpb = NULL;
+  Ci=NULL;
+  Bi=NULL;
+
+  /* Initialize progress bar */
+  auto pb = progress(range(10));
+
+  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
+  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
+  if(H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* bias of cutting planes */
+  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(b == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(alpha == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* indices of examples which define a new cut */
+  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
+  if(new_cut == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
+  if(I == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  for(i=0; i< BufSize; i++) I[i] = 1;
+
+  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(diag_H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(old_output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* array of hinge points used in line-serach  */
+  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpf == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpb == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* vectors Ci, Bi are used in the line search procedure */
+  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Ci == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Bi == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  ocas.nCutPlanes = 0;
+  ocas.exitflag = 0;
+  ocas.nIter = 0;
+
+  /* Compute initial value of Q_P assuming that W is zero vector.*/
+  sq_norm_W = 0;
+  xi = nData;
+  ocas.Q_P = 0.5*sq_norm_W + C*xi;
+  ocas.Q_D = 0;
+
+  /* Compute the initial cutting plane */
+  cut_length = nData;
+  for(i=0; i < nData; i++)
+    new_cut[i] = i;
+
+	gap=(ocas.Q_P-ocas.Q_D)/CMath::abs(ocas.Q_P);
+	pb.print_absolute(
+		gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(TolRel));
+
+	ocas.trn_err = nData;
+	ocas.ocas_time = get_time() - ocas_start_time;
+	/*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f,
+	   Q_P-Q_D/abs(Q_P)=%f\n",
+		    ocas.nIter,cur_time,
+	   ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
+	*/
+	ocas_print(ocas);
+
+	/* main loop */
+	while (ocas.exitflag == 0)
+	{
+		ocas.nIter++;
+
+		/* append a new cut to the buffer and update H */
+		b[ocas.nCutPlanes] = -(float64_t)cut_length;
+
+		start_time = get_time();
+
+		if (add_new_cut(
+			    &H[LIBOCAS_INDEX(0, ocas.nCutPlanes, BufSize)], new_cut,
+			    cut_length, ocas.nCutPlanes, user_data) != 0)
+		{
+			ocas.exitflag = -2;
+			goto cleanup;
+		}
+
+		ocas.add_time += get_time() - start_time;
+
+		/* copy new added row:
+		 * H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) =
+		 * H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
+		diag_H[ocas.nCutPlanes] =
+			H[LIBOCAS_INDEX(ocas.nCutPlanes, ocas.nCutPlanes, BufSize)];
+		for (i = 0; i < ocas.nCutPlanes; i++)
+		{
+			H[LIBOCAS_INDEX(ocas.nCutPlanes, i, BufSize)] =
+				H[LIBOCAS_INDEX(i, ocas.nCutPlanes, BufSize)];
+		}
+
+		ocas.nCutPlanes++;
+
+		/* call inner QP solver */
+		start_time = get_time();
+
+		qp_exitflag = libqp_splx_solver(
+			&get_col, diag_H, b, &C, I, &S, alpha, ocas.nCutPlanes,
+			QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF, 0);
+
+		ocas.qp_exitflag = qp_exitflag.exitflag;
+
+		ocas.qp_solver_time += get_time() - start_time;
+		ocas.Q_D = -qp_exitflag.QP;
+
+		ocas.nNZAlpha = 0;
+		for (i = 0; i < ocas.nCutPlanes; i++)
+		{
+			if (alpha[i] != 0)
+				ocas.nNZAlpha++;
+		}
+
+		sq_norm_oldW = sq_norm_W;
+		start_time = get_time();
+		compute_W(
+			&sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data);
+		ocas.w_time += get_time() - start_time;
+
+		/* select a new cut */
+		switch (Method)
+		{
+		/* cutting plane algorithm implemented in SVMperf and BMRM */
+		case 0:
+
+			start_time = get_time();
+			if (compute_output(output, user_data) != 0)
+			{
+				ocas.exitflag = -2;
+				goto cleanup;
+			}
+			ocas.output_time += get_time() - start_time;
+			gap = (ocas.Q_P - ocas.Q_D) / CMath::abs(ocas.Q_P);
+			pb.print_absolute(
+				gap, -CMath::log10(gap), -CMath::log10(1),
+				-CMath::log10(TolRel));
+
+			xi = 0;
+			cut_length = 0;
+			ocas.trn_err = 0;
+			for (i = 0; i < nData; i++)
+			{
+				if (output[i] <= 0)
+					ocas.trn_err++;
+
+				if (output[i] <= 1)
+				{
+					xi += 1 - output[i];
+					new_cut[cut_length] = i;
+					cut_length++;
+				}
+			}
+			ocas.Q_P = 0.5 * sq_norm_W + C * xi;
+
+			ocas.ocas_time = get_time() - ocas_start_time;
+
+			/*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f,
+			   1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+				      ocas.nIter,cur_time,
+			   ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+				      ocas.nNZAlpha,
+			   100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+			*/
+
+			start_time = get_time();
+			ocas_print(ocas);
+			ocas.print_time += get_time() - start_time;
+
+			break;
+
+		/* Ocas strategy */
+		case 1:
+
+			/* Linesearch */
+			A0 = sq_norm_W - 2 * dot_prod_WoldW + sq_norm_oldW;
+			B0 = dot_prod_WoldW - sq_norm_oldW;
+
+			sg_memcpy(old_output, output, sizeof(float64_t) * nData);
+
+			start_time = get_time();
+			if (compute_output(output, user_data) != 0)
+			{
+				ocas.exitflag = -2;
+				goto cleanup;
+			}
+			ocas.output_time += get_time() - start_time;
+
+			uint32_t num_hp = 0;
+			GradVal = B0;
+			for (i = 0; i < nData; i++)
+			{
+
+				Ci[i] = C * (1 - old_output[i]);
+				Bi[i] = C * (old_output[i] - output[i]);
+
+				float64_t val;
+				if (Bi[i] != 0)
+					val = -Ci[i] / Bi[i];
+				else
+					val = -LIBOCAS_PLUS_INF;
+
+				if (val > 0)
+				{
+					/*            hpi[num_hp] = i;*/
+					hpb[num_hp] = Bi[i];
+					hpf[num_hp] = val;
+					num_hp++;
+				}
+
+				if ((Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
+					GradVal += Bi[i];
+			}
+
+			t = 0;
+			if (GradVal < 0)
+			{
+				start_time = get_time();
+				/*          if( sort(hpf, hpi, num_hp) != 0)*/
+				if (sort(hpf, hpb, num_hp) != 0)
+				{
+					ocas.exitflag = -2;
+					goto cleanup;
+				}
+				ocas.sort_time += get_time() - start_time;
+
+				float64_t t_new, GradVal_new;
+				i = 0;
+				while (GradVal < 0 && i < num_hp)
+				{
+					t_new = hpf[i];
+					GradVal_new =
+						GradVal + LIBOCAS_ABS(hpb[i]) + A0 * (t_new - t);
+
+					if (GradVal_new >= 0)
+					{
+						t = t + GradVal * (t - t_new) / (GradVal_new - GradVal);
+					}
+					else
+					{
+						t = t_new;
+						i++;
+					}
+
+					GradVal = GradVal_new;
+				}
+			}
+
+			/*
+			t = hpf[0] - 1;
+			i = 0;
+			GradVal = t*A0 + Bsum;
+			while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
+			  t = hpf[i];
+			  Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
+			  GradVal = t*A0 + Bsum;
+			  i++;
+			}
+			*/
+			t = LIBOCAS_MAX(
+				t, 0); /* just sanity check; t < 0 should not ocure */
+
+			t1 = t;                  /* new (best so far) W */
+			t2 = t + MU * (1.0 - t); /* new cutting plane */
+			/*        t2 = t+(1.0-t)/10.0;   */
+
+			/* update W to be the best so far solution */
+			sq_norm_W = update_W(t1, user_data);
+
+			/* select a new cut */
+			xi = 0;
+			cut_length = 0;
+			ocas.trn_err = 0;
+			for (i = 0; i < nData; i++)
+			{
+
+				if ((old_output[i] * (1 - t2) + t2 * output[i]) <= 1)
+				{
+					new_cut[cut_length] = i;
+					cut_length++;
+				}
+
+				output[i] = old_output[i] * (1 - t1) + t1 * output[i];
+
+				if (output[i] <= 1)
+					xi += 1 - output[i];
+				if (output[i] <= 0)
+					ocas.trn_err++;
+			}
+
+			ocas.Q_P = 0.5 * sq_norm_W + C * xi;
+
+			ocas.ocas_time = get_time() - ocas_start_time;
+
+			/*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f,
+			   1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+				       ocas.nIter, cur_time,
+			   ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+				       ocas.nNZAlpha,
+			   100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+			*/
+
+			start_time = get_time();
+			ocas_print(ocas);
+			ocas.print_time += get_time() - start_time;
+
+			break;
+		}
+
+		/* Stopping conditions */
+		if (ocas.Q_P - ocas.Q_D <= TolRel * LIBOCAS_ABS(ocas.Q_P))
+			ocas.exitflag = 1;
+		if (ocas.Q_P - ocas.Q_D <= TolAbs)
+			ocas.exitflag = 2;
+		if (ocas.Q_P <= QPBound)
+			ocas.exitflag = 3;
+		if (MaxTime > 0 && ocas.ocas_time >= MaxTime)
+			ocas.exitflag = 4;
+		if (ocas.nCutPlanes >= BufSize)
+			ocas.exitflag = -1;
+
+  } /* end of the main loop */
+
+  pb.complete_absolute();
+cleanup:
+
+  LIBOCAS_FREE(H);
+  LIBOCAS_FREE(b);
+  LIBOCAS_FREE(alpha);
+  LIBOCAS_FREE(new_cut);
+  LIBOCAS_FREE(I);
+  LIBOCAS_FREE(diag_H);
+  LIBOCAS_FREE(output);
+  LIBOCAS_FREE(old_output);
+  LIBOCAS_FREE(hpf);
+/*  LIBOCAS_FREE(hpi);*/
+  LIBOCAS_FREE(hpb);
+  LIBOCAS_FREE(Ci);
+  LIBOCAS_FREE(Bi);
+
+  ocas.ocas_time = get_time() - ocas_start_time;
+
+  return(ocas);
+}
+
+
+/*----------------------------------------------------------------------
+  Binary linear Ocas-SVM solver which allows using different C for each
+  training example.
+  ----------------------------------------------------------------------*/
+ocas_return_value_T svm_ocas_solver_difC(
+            float64_t *C,
+            uint32_t nData,
+            float64_t TolRel,
+            float64_t TolAbs,
+            float64_t QPBound,
+            float64_t MaxTime,
+            uint32_t _BufSize,
+            uint8_t Method,
+            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+            float64_t (*update_W)(float64_t, void*),
+            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+            int (*compute_output)(float64_t*, void* ),
+            int (*sort)(float64_t*, float64_t*, uint32_t),
+			void (*ocas_print)(ocas_return_value_T),
+			void* user_data)
+{
+  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  float64_t *b, *alpha, *diag_H;
+  float64_t *output, *old_output;
+  float64_t xi, sq_norm_W, QPSolverTolRel, dot_prod_WoldW, sq_norm_oldW;
+  float64_t A0, B0, GradVal, t, t1, t2, *Ci, *Bi, *hpf, *hpb;
+  float64_t start_time, ocas_start_time;
+  float64_t qp_b = 1.0;
+  float64_t new_b;
+  uint32_t cut_length;
+  uint32_t i, *new_cut;
+  uint32_t *I;
+  uint8_t S = 1;
+  libqp_state_T qp_exitflag;
+
+  ocas_start_time = get_time();
+  ocas.qp_solver_time = 0;
+  ocas.output_time = 0;
+  ocas.sort_time = 0;
+  ocas.add_time = 0;
+  ocas.w_time = 0;
+  ocas.print_time = 0;
+
+  BufSize = _BufSize;
+
+  QPSolverTolRel = TolRel*0.5;
+
+  H=NULL;
+  b=NULL;
+  alpha=NULL;
+  new_cut=NULL;
+  I=NULL;
+  diag_H=NULL;
+  output=NULL;
+  old_output=NULL;
+  hpf=NULL;
+  hpb = NULL;
+  Ci=NULL;
+  Bi=NULL;
+
+  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
+  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
+  if(H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* bias of cutting planes */
+  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(b == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(alpha == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* indices of examples which define a new cut */
+  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
+  if(new_cut == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
+  if(I == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  for(i=0; i< BufSize; i++) I[i] = 1;
+
+  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(diag_H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  old_output = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(old_output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* array of hinge points used in line-serach  */
+  hpf = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpf == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  hpb = (float64_t*) LIBOCAS_CALLOC(nData, float64_t);
+  if(hpb == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* vectors Ci, Bi are used in the line search procedure */
+  Ci = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Ci == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  Bi = (float64_t*)LIBOCAS_CALLOC(nData, float64_t);
+  if(Bi == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  ocas.nCutPlanes = 0;
+  ocas.exitflag = 0;
+  ocas.nIter = 0;
+
+  /* Compute initial value of Q_P assuming that W is zero vector.*/
+  sq_norm_W = 0;
+/*
+  xi = nData;
+  ocas.Q_P = 0.5*sq_norm_W + C*xi;
+*/
+  ocas.Q_D = 0;
+
+  /* Compute the initial cutting plane */
+  cut_length = nData;
+  new_b = 0;
+  for(i=0; i < nData; i++)
+  {
+    new_cut[i] = i;
+    new_b += C[i];
+  }
+
+  ocas.Q_P = 0.5*sq_norm_W + new_b;
+
+
+  ocas.trn_err = nData;
+  ocas.ocas_time = get_time() - ocas_start_time;
+  /*  ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, Q_P-Q_D/abs(Q_P)=%f\n",
+          ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P));
+  */
+  ocas_print(ocas);
+
+  /* main loop */
+  while( ocas.exitflag == 0 )
+  {
+    ocas.nIter++;
+
+    /* append a new cut to the buffer and update H */
+/*    b[ocas.nCutPlanes] = -(float64_t)cut_length*C;*/
+    b[ocas.nCutPlanes] = -new_b;
+
+    start_time = get_time();
+
+    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, cut_length, ocas.nCutPlanes, user_data ) != 0)
+    {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+    }
+
+    ocas.add_time += get_time() - start_time;
+
+    /* copy new added row:  H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
+    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
+    for(i=0; i < ocas.nCutPlanes; i++) {
+      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
+    }
+
+    ocas.nCutPlanes++;
+
+    /* call inner QP solver */
+    start_time = get_time();
+
+/*    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,*/
+/*                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);*/
+    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &qp_b, I, &S, alpha,
+                                  ocas.nCutPlanes, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
+
+    ocas.qp_exitflag = qp_exitflag.exitflag;
+
+    ocas.qp_solver_time += get_time() - start_time;
+    ocas.Q_D = -qp_exitflag.QP;
+
+    ocas.nNZAlpha = 0;
+    for(i=0; i < ocas.nCutPlanes; i++) {
+      if( alpha[i] != 0) ocas.nNZAlpha++;
+    }
+
+    sq_norm_oldW = sq_norm_W;
+    start_time = get_time();
+    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
+    ocas.w_time += get_time() - start_time;
+
+    /* select a new cut */
+    switch( Method )
+    {
+      /* cutting plane algorithm implemented in SVMperf and BMRM */
+      case 0:
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        xi = 0;
+        cut_length = 0;
+        ocas.trn_err = 0;
+        new_b = 0;
+        for(i=0; i < nData; i++)
+        {
+          if(output[i] <= 0) ocas.trn_err++;
+
+/*          if(output[i] <= 1) {*/
+/*            xi += 1 - output[i];*/
+          if(output[i] <= C[i]) {
+            xi += C[i] - output[i];
+            new_cut[cut_length] = i;
+            cut_length++;
+            new_b += C[i];
+          }
+        }
+/*        ocas.Q_P = 0.5*sq_norm_W + C*xi;*/
+        ocas.Q_P = 0.5*sq_norm_W + xi;
+
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+                  ocas.nIter,cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+                  ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+        */
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+
+
+      /* Ocas strategy */
+      case 1:
+
+        /* Linesearch */
+        A0 = sq_norm_W -2*dot_prod_WoldW + sq_norm_oldW;
+        B0 = dot_prod_WoldW - sq_norm_oldW;
+
+        sg_memcpy( old_output, output, sizeof(float64_t)*nData );
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        uint32_t num_hp = 0;
+        GradVal = B0;
+        for(i=0; i< nData; i++) {
+
+/*          Ci[i] = C*(1-old_output[i]);*/
+/*          Bi[i] = C*(old_output[i] - output[i]);*/
+          Ci[i] = (C[i]-old_output[i]);
+          Bi[i] = old_output[i] - output[i];
+
+          float64_t val;
+          if(Bi[i] != 0)
+            val = -Ci[i]/Bi[i];
+          else
+            val = -LIBOCAS_PLUS_INF;
+
+          if (val>0)
+          {
+/*            hpi[num_hp] = i;*/
+            hpb[num_hp] = Bi[i];
+            hpf[num_hp] = val;
+            num_hp++;
+          }
+
+          if( (Bi[i] < 0 && val > 0) || (Bi[i] > 0 && val <= 0))
+            GradVal += Bi[i];
+
+        }
+
+        t = 0;
+        if( GradVal < 0 )
+        {
+          start_time = get_time();
+/*          if( sort(hpf, hpi, num_hp) != 0)*/
+          if( sort(hpf, hpb, num_hp) != 0 )
+          {
+            ocas.exitflag=-2;
+            goto cleanup;
+          }
+          ocas.sort_time += get_time() - start_time;
+
+          float64_t t_new, GradVal_new;
+          i = 0;
+          while( GradVal < 0 && i < num_hp )
+          {
+            t_new = hpf[i];
+            GradVal_new = GradVal + LIBOCAS_ABS(hpb[i]) + A0*(t_new-t);
+
+            if( GradVal_new >= 0 )
+            {
+              t = t + GradVal*(t-t_new)/(GradVal_new - GradVal);
+            }
+            else
+            {
+              t = t_new;
+              i++;
+            }
+
+            GradVal = GradVal_new;
+          }
+        }
+
+        /*
+        t = hpf[0] - 1;
+        i = 0;
+        GradVal = t*A0 + Bsum;
+        while( GradVal < 0 && i < num_hp && hpf[i] < LIBOCAS_PLUS_INF ) {
+          t = hpf[i];
+          Bsum = Bsum + LIBOCAS_ABS(Bi[hpi[i]]);
+          GradVal = t*A0 + Bsum;
+          i++;
+        }
+        */
+        t = LIBOCAS_MAX(t,0);          /* just sanity check; t < 0 should not ocure */
+
+        t1 = t;                /* new (best so far) W */
+        t2 = t+(1.0-t)*MU;   /* new cutting plane */
+        /*        t2 = t+(1.0-t)/10.0;   new cutting plane */
+
+        /* update W to be the best so far solution */
+        sq_norm_W = update_W( t1, user_data );
+
+        /* select a new cut */
+        xi = 0;
+        cut_length = 0;
+        ocas.trn_err = 0;
+        new_b = 0;
+        for(i=0; i < nData; i++ ) {
+
+/*          if( (old_output[i]*(1-t2) + t2*output[i]) <= 1 ) */
+          if( (old_output[i]*(1-t2) + t2*output[i]) <= C[i] )
+          {
+            new_cut[cut_length] = i;
+            cut_length++;
+            new_b += C[i];
+          }
+
+          output[i] = old_output[i]*(1-t1) + t1*output[i];
+
+/*          if( output[i] <= 1) xi += 1-output[i];*/
+          if( output[i] <= C[i]) xi += C[i]-output[i];
+          if( output[i] <= 0) ocas.trn_err++;
+
+        }
+
+/*        ocas.Q_P = 0.5*sq_norm_W + C*xi;*/
+        ocas.Q_P = 0.5*sq_norm_W + xi;
+
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        /*        ocas_print("%4d: tim=%f, Q_P=%f, Q_D=%f, Q_P-Q_D=%f, 1-Q_D/Q_P=%f, nza=%4d, err=%.2f%%, qpf=%d\n",
+                   ocas.nIter, cur_time, ocas.Q_P,ocas.Q_D,ocas.Q_P-ocas.Q_D,(ocas.Q_P-ocas.Q_D)/LIBOCAS_ABS(ocas.Q_P),
+                   ocas.nNZAlpha, 100*(float64_t)ocas.trn_err/(float64_t)nData, ocas.qp_exitflag );
+        */
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+    }
+
+    /* Stopping conditions */
+    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
+    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
+    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
+    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
+    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
+
+  } /* end of the main loop */
+
+cleanup:
+	LIBOCAS_FREE(H);
+	LIBOCAS_FREE(b);
+	LIBOCAS_FREE(alpha);
+	LIBOCAS_FREE(new_cut);
+	LIBOCAS_FREE(I);
+	LIBOCAS_FREE(diag_H);
+	LIBOCAS_FREE(output);
+	LIBOCAS_FREE(old_output);
+	LIBOCAS_FREE(hpf);
+	/*  LIBOCAS_FREE(hpi);*/
+	LIBOCAS_FREE(hpb);
+	LIBOCAS_FREE(Ci);
+	LIBOCAS_FREE(Bi);
+
+	ocas.ocas_time = get_time() - ocas_start_time;
+
+	return (ocas);
+}
+
+
+
+/*----------------------------------------------------------------------
+  Multiclass SVM-Ocas solver
+  ----------------------------------------------------------------------*/
+
+/* Helper function needed by the multi-class SVM linesearch.
+
+  - This function finds a simplified representation of a piece-wise linear function
+  by splitting the domain into intervals and fining active terms for these intevals */
+static void findactive(float64_t *Theta, float64_t *SortedA, uint32_t *nSortedA, float64_t *A, float64_t *B, int n,
+            int (*sort)(float64_t*, float64_t*, uint32_t))
+{
+  float64_t tmp, theta;
+  uint32_t i, j, idx, idx2 = 0, start;
+
+  sort(A,B,n);
+
+  idx = 0;
+  i = 0;
+  while( i < (uint32_t)n-1 && A[i] == A[i+1])
+  {
+    if( B[i+1] > B[idx] )
+    {
+      idx = i+1;
+    }
+    i++;
+  }
+
+  (*nSortedA) = 1;
+  SortedA[0] = A[idx];
+
+  while(1)
+  {
+    start = idx + 1;
+    while( start < (uint32_t)n && A[idx] == A[start])
+      start++;
+
+    theta = LIBOCAS_PLUS_INF;
+    for(j=start; j < (uint32_t)n; j++)
+    {
+      tmp = (B[j] - B[idx])/(A[idx]-A[j]);
+      if( tmp < theta)
+      {
+        theta = tmp;
+        idx2 = j;
+      }
+    }
+
+    if( theta < LIBOCAS_PLUS_INF)
+    {
+      Theta[(*nSortedA) - 1] = theta;
+      SortedA[(*nSortedA)] = A[idx2];
+      (*nSortedA)++;
+      idx = idx2;
+    }
+    else
+      return;
+  }
+}
+
+
+/*----------------------------------------------------------------------
+  Multiclass linear OCAS-SVM solver.
+  ----------------------------------------------------------------------*/
+ocas_return_value_T msvm_ocas_solver(
+            float64_t C,
+            float64_t *data_y,
+            uint32_t nY,
+            uint32_t nData,
+            float64_t TolRel,
+            float64_t TolAbs,
+            float64_t QPBound,
+            float64_t MaxTime,
+            uint32_t _BufSize,
+            uint8_t Method,
+            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+            float64_t (*update_W)(float64_t, void*),
+            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, void*),
+            int (*compute_output)(float64_t*, void* ),
+            int (*sort)(float64_t*, float64_t*, uint32_t),
+			void (*ocas_print)(ocas_return_value_T),
+			void* user_data)
+{
+  ocas_return_value_T ocas={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  float64_t *b, *alpha, *diag_H;
+  float64_t *output, *old_output;
+  float64_t xi, sq_norm_W, QPSolverTolRel, QPSolverTolAbs, dot_prod_WoldW, sq_norm_oldW;
+  float64_t A0, B0, t, t1, t2, R, tmp, element_b, x;
+  float64_t *A, *B, *theta, *Theta, *sortedA, *Add;
+  float64_t start_time, ocas_start_time, grad_sum, grad, min_x = 0, old_x, old_grad;
+  uint32_t i, y, y2, ypred = 0, *new_cut, cnt1, cnt2, j, nSortedA, idx;
+  uint32_t *I;
+  uint8_t S = 1;
+  libqp_state_T qp_exitflag;
+
+  ocas_start_time = get_time();
+  ocas.qp_solver_time = 0;
+  ocas.output_time = 0;
+  ocas.sort_time = 0;
+  ocas.add_time = 0;
+  ocas.w_time = 0;
+  ocas.print_time = 0;
+
+  BufSize = _BufSize;
+
+  QPSolverTolRel = TolRel*0.5;
+  QPSolverTolAbs = TolAbs*0.5;
+
+  H=NULL;
+  b=NULL;
+  alpha=NULL;
+  new_cut=NULL;
+  I=NULL;
+  diag_H=NULL;
+  output=NULL;
+  old_output=NULL;
+  A = NULL;
+  B = NULL;
+  theta = NULL;
+  Theta = NULL;
+  sortedA = NULL;
+  Add = NULL;
+
+  /* Hessian matrix contains dot product of normal vectors of selected cutting planes */
+  H = (float64_t*)LIBOCAS_CALLOC(BufSize*BufSize, float64_t);
+  if(H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* bias of cutting planes */
+  b = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(b == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  alpha = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(alpha == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* indices of examples which define a new cut */
+  new_cut = (uint32_t*)LIBOCAS_CALLOC(nData, uint32_t);
+  if(new_cut == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  I = (uint32_t*)LIBOCAS_CALLOC(BufSize, uint32_t);
+  if(I == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  for(i=0; i< BufSize; i++)
+    I[i] = 1;
+
+  diag_H = (float64_t*)LIBOCAS_CALLOC(BufSize, float64_t);
+  if(diag_H == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  output = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  old_output = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(old_output == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* auxciliary variables used in the linesearch */
+  A = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(A == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  B = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(B == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  theta = (float64_t*)LIBOCAS_CALLOC(nY, float64_t);
+  if(theta == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  sortedA = (float64_t*)LIBOCAS_CALLOC(nY, float64_t);
+  if(sortedA == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  Theta = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(Theta == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  Add = (float64_t*)LIBOCAS_CALLOC(nData*nY, float64_t);
+  if(Add == NULL)
+  {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+  }
+
+  /* Set initial values*/
+  ocas.nCutPlanes = 0;
+  ocas.exitflag = 0;
+  ocas.nIter = 0;
+  ocas.Q_D = 0;
+  ocas.trn_err = nData;
+  R = (float64_t)nData;
+  sq_norm_W = 0;
+  element_b = (float64_t)nData;
+  ocas.Q_P = 0.5*sq_norm_W + C*R;
+
+  /* initial cutting plane */
+  for(i=0; i < nData; i++)
+  {
+    y2 = (uint32_t)data_y[i];
+
+    if(y2 > 0)
+      new_cut[i] = 0;
+    else
+      new_cut[i] = 1;
+
+  }
+
+  ocas.ocas_time = get_time() - ocas_start_time;
+
+  start_time = get_time();
+  ocas_print(ocas);
+  ocas.print_time += get_time() - start_time;
+
+  /* main loop of the OCAS */
+  while( ocas.exitflag == 0 )
+  {
+    ocas.nIter++;
+
+    /* append a new cut to the buffer and update H */
+    b[ocas.nCutPlanes] = -(float64_t)element_b;
+
+    start_time = get_time();
+
+    if(add_new_cut( &H[LIBOCAS_INDEX(0,ocas.nCutPlanes,BufSize)], new_cut, ocas.nCutPlanes, user_data ) != 0)
+    {
+	  ocas.exitflag=-2;
+	  goto cleanup;
+    }
+
+    ocas.add_time += get_time() - start_time;
+
+    /* copy newly appended row: H(ocas.nCutPlanes,ocas.nCutPlanes,1:ocas.nCutPlanes-1) = H(1:ocas.nCutPlanes-1:ocas.nCutPlanes)' */
+    diag_H[ocas.nCutPlanes] = H[LIBOCAS_INDEX(ocas.nCutPlanes,ocas.nCutPlanes,BufSize)];
+    for(i=0; i < ocas.nCutPlanes; i++)
+    {
+      H[LIBOCAS_INDEX(ocas.nCutPlanes,i,BufSize)] = H[LIBOCAS_INDEX(i,ocas.nCutPlanes,BufSize)];
+    }
+
+    ocas.nCutPlanes++;
+
+    /* call inner QP solver */
+    start_time = get_time();
+
+    qp_exitflag = libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, alpha,
+                                  ocas.nCutPlanes, QPSolverMaxIter, QPSolverTolAbs, QPSolverTolRel, -LIBOCAS_PLUS_INF,0);
+
+    ocas.qp_exitflag = qp_exitflag.exitflag;
+
+    ocas.qp_solver_time += get_time() - start_time;
+    ocas.Q_D = -qp_exitflag.QP;
+
+    ocas.nNZAlpha = 0;
+    for(i=0; i < ocas.nCutPlanes; i++)
+      if( alpha[i] != 0) ocas.nNZAlpha++;
+
+    sq_norm_oldW = sq_norm_W;
+    start_time = get_time();
+    compute_W( &sq_norm_W, &dot_prod_WoldW, alpha, ocas.nCutPlanes, user_data );
+    ocas.w_time += get_time() - start_time;
+
+    /* select a new cut */
+    switch( Method )
+    {
+      /* cutting plane algorithm implemented in SVMperf and BMRM */
+      case 0:
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        /* the following loop computes: */
+        element_b = 0.0;    /*  element_b = R(old_W) - g'*old_W */
+        R = 0;              /*  R(W) = sum_i max_y ( [[y != y_i]] + (w_y- w_y_i)'*x_i )    */
+        ocas.trn_err = 0;   /*  trn_err = sum_i [[y != y_i ]]                              */
+                            /* new_cut[i] = argmax_i ( [[y != y_i]] + (w_y- w_y_i)'*x_i )  */
+        for(i=0; i < nData; i++)
+        {
+          y2 = (uint32_t)data_y[i];
+
+          for(xi=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
+          {
+            if(y2 != y && xi < output[LIBOCAS_INDEX(y,i,nY)])
+            {
+              xi = output[LIBOCAS_INDEX(y,i,nY)];
+              ypred = y;
+            }
+          }
+
+          if(xi >= output[LIBOCAS_INDEX(y2,i,nY)])
+            ocas.trn_err ++;
+
+          xi = LIBOCAS_MAX(0,xi+1-output[LIBOCAS_INDEX(y2,i,nY)]);
+          R += xi;
+          if(xi > 0)
+          {
+            element_b++;
+            new_cut[i] = ypred;
+          }
+          else
+            new_cut[i] = y2;
+        }
+
+        ocas.Q_P = 0.5*sq_norm_W + C*R;
+
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+
+      /* The OCAS solver */
+      case 1:
+        sg_memcpy( old_output, output, sizeof(float64_t)*nData*nY );
+
+        start_time = get_time();
+        if( compute_output( output, user_data ) != 0)
+        {
+          ocas.exitflag=-2;
+          goto cleanup;
+        }
+        ocas.output_time += get_time()-start_time;
+
+        A0 = sq_norm_W - 2*dot_prod_WoldW + sq_norm_oldW;
+        B0 = dot_prod_WoldW - sq_norm_oldW;
+
+        for(i=0; i < nData; i++)
+        {
+          y2 = (uint32_t)data_y[i];
+
+          for(y=0; y < nY; y++)
+          {
+            A[LIBOCAS_INDEX(y,i,nY)] = C*(output[LIBOCAS_INDEX(y,i,nY)] - old_output[LIBOCAS_INDEX(y,i,nY)]
+                                       + old_output[LIBOCAS_INDEX(y2,i,nY)] - output[LIBOCAS_INDEX(y2,i,nY)]);
+            B[LIBOCAS_INDEX(y,i,nY)] = C*(old_output[LIBOCAS_INDEX(y,i,nY)] - old_output[LIBOCAS_INDEX(y2,i,nY)]
+                                       + (float64_t)(y != y2));
+          }
+        }
+
+        /* linesearch */
+/*      new_x = msvm_linesearch_mex(A0,B0,AA*C,BB*C);*/
+
+        grad_sum = B0;
+        cnt1 = 0;
+        cnt2 = 0;
+        for(i=0; i < nData; i++)
+        {
+          findactive(theta,sortedA,&nSortedA,&A[i*nY],&B[i*nY],nY,sort);
+
+          idx = 0;
+          while( idx < nSortedA-1 && theta[idx] < 0 )
+            idx++;
+
+          grad_sum += sortedA[idx];
+
+          for(j=idx; j < nSortedA-1; j++)
+          {
+            Theta[cnt1] = theta[j];
+            cnt1++;
+          }
+
+          for(j=idx+1; j < nSortedA; j++)
+          {
+            Add[cnt2] = -sortedA[j-1]+sortedA[j];
+            cnt2++;
+          }
+        }
+
+        start_time = get_time();
+        sort(Theta,Add,cnt1);
+        ocas.sort_time += get_time() - start_time;
+
+        grad = grad_sum;
+        if(grad >= 0)
+        {
+          min_x = 0;
+        }
+        else
+        {
+          old_x = 0;
+          old_grad = grad;
+
+          for(i=0; i < cnt1; i++)
+          {
+            x = Theta[i];
+
+            grad = x*A0 + grad_sum;
+
+            if(grad >=0)
+            {
+
+              min_x = (grad*old_x - old_grad*x)/(grad - old_grad);
+
+              break;
+            }
+            else
+            {
+              grad_sum = grad_sum + Add[i];
+
+              grad = x*A0 + grad_sum;
+              if( grad >= 0)
+              {
+                min_x = x;
+                break;
+              }
+            }
+
+            old_grad = grad;
+            old_x = x;
+          }
+        }
+        /* end of the linesearch which outputs min_x */
+
+        t = min_x;
+        t1 = t;                /* new (best so far) W */
+        t2 = t+(1.0-t)*MU;   /* new cutting plane */
+        /*        t2 = t+(1.0-t)/10.0;    */
+
+        /* update W to be the best so far solution */
+        sq_norm_W = update_W( t1, user_data );
+
+        /* the following code  computes a new cutting plane: */
+        element_b = 0.0;    /*  element_b = R(old_W) - g'*old_W */
+                            /* new_cut[i] = argmax_i ( [[y != y_i]] + (w_y- w_y_i)'*x_i )  */
+        for(i=0; i < nData; i++)
+        {
+          y2 = (uint32_t)data_y[i];
+
+          for(xi=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
+          {
+            tmp = old_output[LIBOCAS_INDEX(y,i,nY)]*(1-t2) + t2*output[LIBOCAS_INDEX(y,i,nY)];
+            if(y2 != y && xi < tmp)
+            {
+              xi = tmp;
+              ypred = y;
+            }
+          }
+
+          tmp = old_output[LIBOCAS_INDEX(y2,i,nY)]*(1-t2) + t2*output[LIBOCAS_INDEX(y2,i,nY)];
+          xi = LIBOCAS_MAX(0,xi+1-tmp);
+          if(xi > 0)
+          {
+            element_b++;
+            new_cut[i] = ypred;
+          }
+          else
+            new_cut[i] = y2;
+        }
+
+        /* compute Risk, class. error and update outputs to correspond to the new W */
+        ocas.trn_err = 0;   /*  trn_err = sum_i [[y != y_i ]]                       */
+        R = 0;
+        for(i=0; i < nData; i++)
+        {
+          y2 = (uint32_t)data_y[i];
+
+          for(tmp=-LIBOCAS_PLUS_INF, y=0; y < nY; y++)
+          {
+            output[LIBOCAS_INDEX(y,i,nY)] = old_output[LIBOCAS_INDEX(y,i,nY)]*(1-t1) + t1*output[LIBOCAS_INDEX(y,i,nY)];
+
+            if(y2 != y && tmp < output[LIBOCAS_INDEX(y,i,nY)])
+            {
+              ypred = y;
+              tmp = output[LIBOCAS_INDEX(y,i,nY)];
+            }
+          }
+
+          R += LIBOCAS_MAX(0,1+tmp - output[LIBOCAS_INDEX(y2,i,nY)]);
+          if( tmp >= output[LIBOCAS_INDEX(y2,i,nY)])
+            ocas.trn_err ++;
+        }
+
+        ocas.Q_P = 0.5*sq_norm_W + C*R;
+
+
+        /* get time and print status */
+        ocas.ocas_time = get_time() - ocas_start_time;
+
+        start_time = get_time();
+        ocas_print(ocas);
+        ocas.print_time += get_time() - start_time;
+
+        break;
+
+    }
+
+    /* Stopping conditions */
+    if( ocas.Q_P - ocas.Q_D <= TolRel*LIBOCAS_ABS(ocas.Q_P)) ocas.exitflag = 1;
+    if( ocas.Q_P - ocas.Q_D <= TolAbs) ocas.exitflag = 2;
+    if( ocas.Q_P <= QPBound) ocas.exitflag = 3;
+    if( MaxTime > 0 && ocas.ocas_time >= MaxTime) ocas.exitflag = 4;
+    if(ocas.nCutPlanes >= BufSize) ocas.exitflag = -1;
+
+  } /* end of the main loop */
+
+cleanup:
+
+  LIBOCAS_FREE(H);
+  LIBOCAS_FREE(b);
+  LIBOCAS_FREE(alpha);
+  LIBOCAS_FREE(new_cut);
+  LIBOCAS_FREE(I);
+  LIBOCAS_FREE(diag_H);
+  LIBOCAS_FREE(output);
+  LIBOCAS_FREE(old_output);
+  LIBOCAS_FREE(A);
+  LIBOCAS_FREE(B);
+  LIBOCAS_FREE(theta);
+  LIBOCAS_FREE(Theta);
+  LIBOCAS_FREE(sortedA);
+  LIBOCAS_FREE(Add);
+
+  ocas.ocas_time = get_time() - ocas_start_time;
+
+  return(ocas);
+}
+}
diff --git a/src/shogun/lib/external/libocas.h b/src/shogun/lib/external/libocas.h
new file mode 100644
index 00000000000..9cc6a93c722
--- /dev/null
+++ b/src/shogun/lib/external/libocas.h
@@ -0,0 +1,127 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+#include <shogun/lib/config.h>
+#include <shogun/lib/common.h>
+
+#ifndef libocas_h
+#define libocas_h
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+namespace shogun
+{
+#define LIBOCAS_PLUS_INF (-log(0.0))
+#define LIBOCAS_CALLOC(x,y) SG_CALLOC(y,x)
+#define LIBOCAS_FREE(x) SG_FREE(x)
+#define LIBOCAS_INDEX(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
+#define LIBOCAS_MIN(A,B) ((A) > (B) ? (B) : (A))
+#define LIBOCAS_MAX(A,B) ((A) < (B) ? (B) : (A))
+#define LIBOCAS_ABS(A) ((A) < 0 ? -(A) : (A))
+
+typedef struct {
+  uint32_t nIter;        /* number of iterations */
+  uint32_t nCutPlanes;   /* number of cutitng buffered planes */
+  uint32_t nNZAlpha;     /* number of non-zero Lagrangeans (effective number of CPs) */
+  uint32_t trn_err;      /* number of training errors */
+  float64_t Q_P;            /* primal objective value */
+  float64_t Q_D;            /* dual objective value */
+  float64_t output_time;    /* time spent in computing outputs */
+  float64_t sort_time;      /* time spent in sorting */
+  float64_t add_time;       /* time spent in adding examples to compute cutting planes */
+  float64_t w_time;         /* time spent in computing parameter vector  */
+  float64_t qp_solver_time; /* time spent in inner QP solver  */
+  float64_t ocas_time;      /* total time spent in svm_ocas_solver */
+  float64_t print_time;     /* time spent in ocas_print function */
+  int8_t qp_exitflag;    /* exitflag from the last call of the inner QP solver */
+  int8_t exitflag;       /*  1 .. ocas.Q_P - ocas.Q_D <= TolRel*ABS(ocas.Q_P)
+                             2 .. ocas.Q_P - ocas.Q_D <= TolAbs
+                             3 .. ocas.Q_P <= QPBound
+                             4 .. optimization time >= MaxTime
+                            -1 .. ocas.nCutPlanes >= BufSize
+                            -2 .. not enough memory for the solver */
+} ocas_return_value_T;
+
+/* binary linear SVM solver */
+ocas_return_value_T svm_ocas_solver(
+         float64_t C,            /* regularizarion constant */
+         uint32_t nData,      /* number of exmaples */
+         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
+         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
+         float64_t QPBound,      /* halts if QP <= QPBound */
+         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
+         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
+         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
+         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+         float64_t (*update_W)(float64_t, void*),
+         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+         int (*compute_output)( float64_t*, void* ),
+         int (*sort)(float64_t*, float64_t*, uint32_t),
+         void (*ocas_print)(ocas_return_value_T),
+         void* user_data);
+
+/* binary linear SVM solver which allows using different C for each example*/
+ocas_return_value_T svm_ocas_solver_difC(
+         float64_t *C,           /* regularizarion constants for each example */
+         uint32_t nData,      /* number of exmaples */
+         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
+         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
+         float64_t QPBound,      /* halts if QP <= QPBound */
+         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
+         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
+         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
+         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+         float64_t (*update_W)(float64_t, void*),
+         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+         int (*compute_output)( float64_t*, void* ),
+         int (*sort)(float64_t*, float64_t*, uint32_t),
+         void (*ocas_print)(ocas_return_value_T),
+         void* user_data);
+
+/* multi-class (Singer-Crammer formulation) linear SVM solver */
+ocas_return_value_T msvm_ocas_solver(
+            float64_t C,
+            float64_t *data_y,
+            uint32_t nY,
+            uint32_t nData,
+            float64_t TolRel,
+            float64_t TolAbs,
+            float64_t QPBound,
+            float64_t MaxTime,
+            uint32_t _BufSize,
+            uint8_t Method,
+            void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+            float64_t (*update_W)(float64_t, void*),
+            int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, void*),
+            int (*compute_output)(float64_t*, void* ),
+            int (*sort)(float64_t*, float64_t*, uint32_t),
+			void (*ocas_print)(ocas_return_value_T),
+			void* user_data);
+
+
+/* binary linear SVM solver */
+ocas_return_value_T svm_ocas_solver_nnw(
+         float64_t C,            /* regularizarion constant */
+         uint32_t nData,      /* number of exmaples */
+         uint32_t num_nnw,    /* number of components of W which must non-negative*/
+         uint32_t* nnw_idx,   /* indices of W which must be non-negative */
+         float64_t TolRel,       /* halts if 1-Q_P/Q_D <= TolRel */
+         float64_t TolAbs,       /* halts if Q_P-Q_D <= TolRel */
+         float64_t QPBound,      /* halts if QP <= QPBound */
+         float64_t MaxTime,      /* maximal time in seconds spent in optmization */
+         uint32_t BufSize,    /* maximal number of buffered cutting planes  */
+         uint8_t Method,      /* 0..standard CP (SVM-Perf,BMRM), 1..OCAS */
+         int (*add_pw_constr)(uint32_t, uint32_t, void*),
+         void (*clip_neg_w)(uint32_t, uint32_t*, void*),
+         void (*compute_W)(float64_t*, float64_t*, float64_t*, uint32_t, void*),
+         float64_t (*update_W)(float64_t, void*),
+         int (*add_new_cut)(float64_t*, uint32_t*, uint32_t, uint32_t, void*),
+         int (*compute_output)( float64_t*, void* ),
+         int (*sort)(float64_t*, float64_t*, uint32_t),
+         void (*ocas_print)(ocas_return_value_T),
+         void* user_data);
+
+}
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+#endif /* libocas_h */
diff --git a/src/shogun/lib/external/libocas_common.h b/src/shogun/lib/external/libocas_common.h
new file mode 100644
index 00000000000..e7b9933b830
--- /dev/null
+++ b/src/shogun/lib/external/libocas_common.h
@@ -0,0 +1,19 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ */
+
+#include <shogun/lib/config.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/io/SGIO.h>
+
+namespace shogun
+{
+#define OCAS_PLUS_INF CMath::INFTY
+#define OCAS_CALLOC(...) calloc(__VA_ARGS__)
+#define OCAS_FREE(...) SG_FREE(__VA_ARGS__)
+
+#define INDEX2(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
+}
+
diff --git a/src/shogun/lib/external/libqp.h b/src/shogun/lib/external/libqp.h
new file mode 100644
index 00000000000..f5bd68909df
--- /dev/null
+++ b/src/shogun/lib/external/libqp.h
@@ -0,0 +1,73 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc
+ */
+
+#ifndef libqp_h
+#define libqp_h
+
+#include <shogun/lib/config.h>
+#include <math.h>
+
+#include <shogun/lib/common.h>
+namespace shogun
+{
+#define LIBQP_PLUS_INF (-log(0.0))
+#define LIBQP_CALLOC(x,y) SG_CALLOC(y,x)
+#define LIBQP_FREE(x) SG_FREE(x)
+#define LIBQP_INDEX(ROW,COL,NUM_ROWS) ((COL)*(NUM_ROWS)+(ROW))
+#define LIBQP_MIN(A,B) ((A) > (B) ? (B) : (A))
+#define LIBQP_MAX(A,B) ((A) < (B) ? (B) : (A))
+#define LIBQP_ABS(A) ((A) < 0 ? -(A) : (A))
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+/** QP solver return value */
+typedef struct {
+  /** number of iterations */
+  uint32_t nIter;
+  /** primal objective value */
+  float64_t QP;
+  /** dual objective value */
+  float64_t QD;
+  /** exit flag */
+  int8_t exitflag;      /* -1 ... not enough memory
+                            0 ... nIter >= MaxIter
+                            1 ... QP - QD <= TolRel*ABS(QP)
+                            2 ... QP - QD <= TolAbs
+                            3 ... QP <= QP_TH
+                            4 ... eps-KKT conditions satisfied */
+} libqp_state_T;
+#endif
+
+/** QP solver for tasks with simplex constraints */
+libqp_state_T libqp_splx_solver(const float64_t* (*get_col)(uint32_t),
+                  float64_t *diag_H,
+                  float64_t *f,
+                  float64_t *b,
+                  uint32_t *Ivector,
+                  uint8_t *S,
+                  float64_t *x,
+                  uint32_t n,
+                  uint32_t MaxIter,
+                  float64_t TolAbs,
+                  float64_t TolRel,
+                  float64_t QP_TH,
+                  void (*print_state)(libqp_state_T state));
+
+/** Generalized SMO algorithm */
+libqp_state_T libqp_gsmo_solver(const float64_t* (*get_col)(uint32_t),
+            float64_t *diag_H,
+            float64_t *f,
+            float64_t *a,
+            float64_t b,
+            float64_t *LB,
+            float64_t *UB,
+            float64_t *x,
+            uint32_t n,
+            uint32_t MaxIter,
+            float64_t TolKKT,
+            void (*print_state)(libqp_state_T state));
+
+}
+#endif /* libqp_h */
diff --git a/src/shogun/lib/external/libqp_gsmo.cpp b/src/shogun/lib/external/libqp_gsmo.cpp
new file mode 100644
index 00000000000..87619042e59
--- /dev/null
+++ b/src/shogun/lib/external/libqp_gsmo.cpp
@@ -0,0 +1,251 @@
+/*-----------------------------------------------------------------------
+ * libqp_gsmo.c: implementation of the Generalized SMO algorithm.
+ *
+ * DESCRIPTION
+ *  The library provides function which solves the following instance of
+ *  a convex Quadratic Programming task:
+ *
+ *  min QP(x) := 0.5*x'*H*x + f'*x
+ *   x
+ *
+ *   s.t.    a'*x = b
+ *           LB[i] <= x[i] <= UB[i]   for all i=1..n
+ *
+ * A precision of the found solution is controlled by the input argument
+ * TolKKT which defines tightness of the relaxed Karush-Kuhn-Tucker
+ * stopping conditions.
+ *
+ * INPUT ARGUMENTS
+ *  get_col   function which returns pointer to the i-th column of H.
+ *  diag_H [float64_t n x 1] vector containing values on the diagonal of H.
+ *  f [float64_t n x 1] vector.
+ *  a [float64_t n x 1] Vector which must not contain zero entries.
+ *  b [float64_t 1 x 1] Scalar.
+ *  LB [float64_t n x 1] Lower bound; -inf is allowed.
+ *  UB [float64_t n x 1] Upper bound; inf is allowed.
+ *  x [float64_t n x 1] solution vector; must be feasible.
+ *  n [uint32_t 1 x 1] dimension of H.
+ *  MaxIter [uint32_t 1 x 1] max number of iterations.
+ *  TolKKT [float64_t 1 x 1] Tightness of KKT stopping conditions.
+ *  print_state  print function; if == NULL it is not called.
+ *
+ * RETURN VALUE
+ *  structure [libqp_state_T]
+ *   .QP [1x1] Primal objective value.
+ *   .exitflag [1 x 1] Indicates which stopping condition was used:
+ *     -3  ... initial solution vector does not satisfy equality constraint
+ *     -2  ... initial solution vector does not satisfy bounds
+ *     -1  ... not enough memory
+ *      0  ... Maximal number of iterations reached: nIter >= MaxIter.
+ *      4  ... Relaxed KKT conditions satisfied.
+ *   .nIter [1x1] Number of iterations.
+ *
+ * REFERENCE
+ *  S.S. Keerthi, E.G. Gilbert. Convergence of a generalized SMO algorithm
+ *   for SVM classier design. Technical Report CD-00-01, Control Division,
+ *   Dept. of Mechanical and Production Engineering, National University
+ *   of Singapore, 2000.
+ *   http://citeseer.ist.psu.edu/keerthi00convergence.html
+ *
+ *
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ *-------------------------------------------------------------------- */
+
+#include <shogun/lib/config.h>
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include <shogun/lib/common.h>
+#include <shogun/lib/common.h>
+#include <shogun/lib/external/libqp.h>
+
+namespace shogun
+{
+
+libqp_state_T libqp_gsmo_solver(const float64_t* (*get_col)(uint32_t),
+                                float64_t *diag_H,
+                                float64_t *f,
+                                float64_t *a,
+                                float64_t b,
+                                float64_t *LB,
+                                float64_t *UB,
+                                float64_t *x,
+                                uint32_t n,
+                                uint32_t MaxIter,
+                                float64_t TolKKT,
+                                void (*print_state)(libqp_state_T state))
+{
+	float64_t *col_u;
+	float64_t *col_v;
+	float64_t *Nabla;
+	float64_t minF_up;
+	float64_t maxF_low;
+	float64_t tau;
+	float64_t F_i;
+	float64_t tau_ub, tau_lb;
+	uint32_t i, j;
+	uint32_t u=0, v=0;
+	libqp_state_T state;
+	float64_t atx = 0.0;
+
+	Nabla = NULL;
+
+	/* ------------------------------------------------------------ */
+	/* Initialization                                               */
+	/* ------------------------------------------------------------ */
+
+	// check bounds of initial guess
+	for (i=0; i<n; i++)
+	{
+		if (x[i]>UB[i])
+		{
+			state.exitflag = -2;
+			goto cleanup;
+		}
+		if (x[i]<LB[i])
+		{
+			state.exitflag = -2;
+			goto cleanup;
+		}
+	}
+
+	// check equality constraint
+	for (i=0; i<n; i++)
+		atx += a[i]*x[i];
+	if (fabs(b-atx)>1e-9)
+	{
+		printf("%f \ne %f\n",b,atx);
+		state.exitflag = -3;
+		goto cleanup;
+	}
+
+	/* Nabla = H*x + f is gradient*/
+	Nabla = (float64_t*)LIBQP_CALLOC(n, float64_t);
+	if( Nabla == NULL )
+	{
+		state.exitflag=-1;
+		goto cleanup;
+	}
+
+	/* compute gradient */
+	for( i=0; i < n; i++ )
+	{
+		Nabla[i] += f[i];
+		if( x[i] != 0 ) {
+			col_u = (float64_t*)get_col(i);
+			for( j=0; j < n; j++ ) {
+				Nabla[j] += col_u[j]*x[i];
+			}
+		}
+	}
+
+	if( print_state != NULL)
+	{
+		state.QP = 0;
+		for(i = 0; i < n; i++ )
+			state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]);
+
+		print_state( state );
+	}
+
+
+	/* ------------------------------------------------------------ */
+	/* Main optimization loop                                       */
+	/* ------------------------------------------------------------ */
+
+	state.nIter = 0;
+	state.exitflag = 100;
+	while( state.exitflag == 100 )
+	{
+		state.nIter ++;
+
+		/* find the most violating pair of variables */
+		minF_up = LIBQP_PLUS_INF;
+		maxF_low = -LIBQP_PLUS_INF;
+		for(i = 0; i < n; i++ )
+		{
+
+			F_i = Nabla[i]/a[i];
+
+			if(LB[i] < x[i] && x[i] < UB[i])
+			{ /* i is from I_0 */
+				if( minF_up > F_i) { minF_up = F_i; u = i; }
+				if( maxF_low < F_i) { maxF_low = F_i; v = i; }
+			}
+			else if((a[i] > 0 && x[i] == LB[i]) || (a[i] < 0 && x[i] == UB[i]))
+			{ /* i is from I_1 or I_2 */
+				if( minF_up > F_i) { minF_up = F_i; u = i; }
+			}
+			else if((a[i] > 0 && x[i] == UB[i]) || (a[i] < 0 && x[i] == LB[i]))
+			{ /* i is from I_3 or I_4 */
+				if( maxF_low < F_i) { maxF_low = F_i; v = i; }
+			}
+		}
+
+		/* check KKT conditions */
+		if( maxF_low - minF_up <= TolKKT )
+			state.exitflag = 4;
+		else
+		{
+			/* SMO update of the most violating pair */
+			col_u = (float64_t*)get_col(u);
+			col_v = (float64_t*)get_col(v);
+
+			if( a[u] > 0 )
+			{ tau_lb = (LB[u]-x[u])*a[u]; tau_ub = (UB[u]-x[u])*a[u]; }
+			else
+			{ tau_ub = (LB[u]-x[u])*a[u]; tau_lb = (UB[u]-x[u])*a[u]; }
+
+			if( a[v] > 0 )
+			{ tau_lb = LIBQP_MAX(tau_lb,(x[v]-UB[v])*a[v]); tau_ub = LIBQP_MIN(tau_ub,(x[v]-LB[v])*a[v]); }
+			else
+			{ tau_lb = LIBQP_MAX(tau_lb,(x[v]-LB[v])*a[v]); tau_ub = LIBQP_MIN(tau_ub,(x[v]-UB[v])*a[v]); }
+
+			tau = (Nabla[v]/a[v]-Nabla[u]/a[u])/
+				(diag_H[u]/(a[u]*a[u]) + diag_H[v]/(a[v]*a[v]) - 2*col_u[v]/(a[u]*a[v]));
+
+			tau = LIBQP_MIN(LIBQP_MAX(tau,tau_lb),tau_ub);
+
+			x[u] += tau/a[u];
+			x[v] -= tau/a[v];
+
+			/* update Nabla */
+			for(i = 0; i < n; i++ )
+				Nabla[i] += col_u[i]*tau/a[u] - col_v[i]*tau/a[v];
+
+		}
+
+		if( state.nIter >= MaxIter )
+			state.exitflag = 0;
+
+		if( print_state != NULL)
+		{
+			state.QP = 0;
+			for(i = 0; i < n; i++ )
+				state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]);
+
+			print_state( state );
+		}
+
+	}
+
+	/* compute primal objective value */
+	state.QP = 0;
+	for(i = 0; i < n; i++ )
+		state.QP += 0.5*(x[i]*Nabla[i]+x[i]*f[i]);
+
+cleanup:
+
+	LIBQP_FREE(Nabla);
+
+	return( state );
+}
+
+} /* shogun namespace */
diff --git a/src/shogun/lib/external/libqp_splx.cpp b/src/shogun/lib/external/libqp_splx.cpp
new file mode 100644
index 00000000000..17e8ff553c6
--- /dev/null
+++ b/src/shogun/lib/external/libqp_splx.cpp
@@ -0,0 +1,408 @@
+/*-----------------------------------------------------------------------
+ * libqp_splx.c: solver for Quadratic Programming task with
+ * simplex constraints.
+ *
+ * DESCRIPTION
+ *  The library provides function which solves the following instance of
+ *  a convex Quadratic Programmin task:
+ *
+ *   min QP(x):= 0.5*x'*H*x + f'*x
+ *    x
+ *
+ * subject to:
+ *   sum_{i in I_k} x[i] == b[k]  for all k such that S[k] == 0
+ *   sum_{i in I_k} x[i] <= b[k]  for all k such that S[k] == 1
+ *                             x(i) >= 0 for all i=1:n
+ *
+ *  where I_k = { i | I[i] == k}, k={1,...,m}.
+ *
+ * A precision of the found solution is controled by the input argumens
+ * MaxIter, TolAbs, QP_TH and MaxIter which define the stopping conditions:
+ *
+ *  nIter >= MaxIter     ->  exitflag = 0   Number of iterations
+ *  QP-QD <= TolAbs      ->  exitflag = 1   Abs. tolerance (duality gap)
+ *  QP-QD <= QP*TolRel   ->  exitflag = 2   Relative tolerance
+ *  QP <= QP_TH          ->  exitflag = 3   Threshold on objective value
+ *
+ * where QP and QD are primal respectively dual objective values.
+ *
+ * INPUT ARGUMENTS
+ *  get_col   function which returns pointer to the i-th column of H.
+ *  diag_H [float64_t n x 1] vector containing values on the diagonal of H.
+ *  f [float64_t n x 1] vector.
+ *  b [float64_t n x 1] vector of positive numbers.
+ *  I [uint16_T n x 1] vector containing numbers 1...m.
+ *  S [uint8_T n x 1] vector containing numbers 0 and 1.
+ *  x [float64_t n x 1] solution vector; must be feasible.
+ *  n [uint32_t 1 x 1] dimension of H.
+ *  MaxIter [uint32_t 1 x 1] max number of iterations.
+ *  TolAbs [float64_t 1 x 1] Absolute tolerance.
+ *  TolRel [float64_t 1 x 1] Relative tolerance.
+ *  QP_TH  [float64_t 1 x 1] Threshold on the primal value.
+ *  print_state  print function; if == NULL it is not called.
+ *
+ * RETURN VALUE
+ *  structure [libqp_state_T]
+ *  .QP [1 x 1] Primal objective value.
+ *  .QD [1 x 1] Dual objective value.
+ *  .nIter [1 x 1] Number of iterations.
+ *  .exitflag [1 x 1] Indicates which stopping condition was used:
+ *    -1  ... Not enough memory.
+ *     0  ... Maximal number of iteations reached: nIter >= MaxIter.
+ *     1  ... Relarive tolerance reached: QP-QD <= abs(QP)*TolRel
+ *     2  ... Absolute tolerance reached: QP-QD <= TolAbs
+ *     3  ... Objective value reached threshold: QP <= QP_TH.
+ *
+ * REFERENCE
+ *  The algorithm is described in:
+ *  V. Franc, V. Hlavac. A Novel Algorithm for Learning Support Vector Machines
+ *   with Structured Output Spaces. Research Report K333 22/06, CTU-CMP-2006-04.
+ *   May, 2006. ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-TR-2006-04.ps
+ *
+ *
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg
+ *
+ *-------------------------------------------------------------------- */
+
+#include <shogun/lib/config.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include <shogun/lib/common.h>
+#include <shogun/lib/external/libqp.h>
+namespace shogun
+{
+
+libqp_state_T libqp_splx_solver(const float64_t* (*get_col)(uint32_t),
+                  float64_t *diag_H,
+                  float64_t *f,
+                  float64_t *b,
+                  uint32_t *Ivector,
+                  uint8_t *S,
+                  float64_t *x,
+                  uint32_t n,
+                  uint32_t MaxIter,
+                  float64_t TolAbs,
+                  float64_t TolRel,
+                  float64_t QP_TH,
+				  void (*print_state)(libqp_state_T state))
+{
+  float64_t *d;
+  float64_t *col_u, *col_v;
+  float64_t *x_neq;
+  float64_t tmp;
+  float64_t improv;
+  float64_t tmp_num;
+  float64_t tmp_den=0;
+  float64_t tau=0;
+  float64_t delta;
+  uint32_t *inx;
+  uint32_t *nk;
+  uint32_t m;
+  int32_t u=0;
+  int32_t v=0;
+  uint32_t k;
+  uint32_t i, j;
+  libqp_state_T state;
+
+
+  /* ------------------------------------------------------------
+    Initialization
+  ------------------------------------------------------------ */
+  state.nIter = 0;
+  state.QP = LIBQP_PLUS_INF;
+  state.QD = -LIBQP_PLUS_INF;
+  state.exitflag = 100;
+
+  inx=NULL;
+  nk=NULL;
+  d=NULL;
+  x_neq = NULL;
+
+  /* count number of constraints */
+  for( i=0, m=0; i < n; i++ )
+    m = LIBQP_MAX(m,Ivector[i]);
+
+  /* auxciliary variables for tranforming equalities to inequalities */
+  x_neq = (float64_t*) LIBQP_CALLOC(m, float64_t);
+  if( x_neq == NULL )
+  {
+	  state.exitflag=-1;
+	  goto cleanup;
+  }
+
+  /* inx is translation table between variable index i and its contraint */
+  inx = (uint32_t*) LIBQP_CALLOC(m*n, uint32_t);
+  if( inx == NULL )
+  {
+	  state.exitflag=-1;
+	  goto cleanup;
+  }
+
+  /* nk is the number of variables coupled by i-th linear constraint */
+  nk = (uint32_t*) LIBQP_CALLOC(m, uint32_t);
+  if( nk == NULL )
+  {
+	  state.exitflag=-1;
+	  goto cleanup;
+  }
+
+  /* setup auxciliary variables */
+  for( i=0; i < m; i++ )
+    x_neq[i] = b[i];
+
+
+  /* create inx and nk */
+  for( i=0; i < n; i++ ) {
+     k = Ivector[i]-1;
+     inx[LIBQP_INDEX(nk[k],k,n)] = i;
+     nk[k]++;
+
+     if(S[k] != 0)
+       x_neq[k] -= x[i];
+  }
+
+  /* d = H*x + f is gradient*/
+  d = (float64_t*) LIBQP_CALLOC(n, float64_t);
+  if( d == NULL )
+  {
+	  state.exitflag=-1;
+	  goto cleanup;
+  }
+
+  /* compute gradient */
+  for( i=0; i < n; i++ )
+  {
+    d[i] += f[i];
+    if( x[i] > 0 ) {
+      col_u = (float64_t*)get_col(i);
+      for( j=0; j < n; j++ ) {
+          d[j] += col_u[j]*x[i];
+      }
+    }
+  }
+
+  /* compute state.QP = 0.5*x'*(f+d);
+             state.QD = 0.5*x'*(f-d); */
+  for( i=0, state.QP = 0, state.QD=0; i < n; i++)
+  {
+    state.QP += x[i]*(f[i]+d[i]);
+    state.QD += x[i]*(f[i]-d[i]);
+  }
+  state.QP = 0.5*state.QP;
+  state.QD = 0.5*state.QD;
+
+  for( i=0; i < m; i++ )
+  {
+    for( j=0, tmp = LIBQP_PLUS_INF; j < nk[i]; j++ )
+      tmp = LIBQP_MIN(tmp, d[inx[LIBQP_INDEX(j,i,n)]]);
+
+    if(S[i] == 0)
+      state.QD += b[i]*tmp;
+    else
+      state.QD += b[i]*LIBQP_MIN(tmp,0);
+  }
+
+  /* print initial state */
+  if( print_state != NULL)
+    print_state( state );
+
+  /* ------------------------------------------------------------
+    Main optimization loop
+  ------------------------------------------------------------ */
+  while( state.exitflag == 100 )
+  {
+    state.nIter ++;
+
+    /* go over blocks of variables coupled by lin. constraint */
+    for( k=0; k < m; k++ )
+    {
+
+      /* compute u = argmin_{i in I_k} d[i]
+             delta =  sum_{i in I_k} x[i]*d[i] - b*min_{i in I_k} */
+      for( j=0, tmp = LIBQP_PLUS_INF, delta = 0; j < nk[k]; j++ )
+      {
+        i = inx[LIBQP_INDEX(j,k,n)];
+        delta += x[i]*d[i];
+        if( tmp > d[i] ) {
+          tmp = d[i];
+          u = i;
+        }
+      }
+
+      if(S[k] != 0 && d[u] > 0)
+        u = -1;
+      else
+        delta -= b[k]*d[u];
+
+      /* if satisfied then k-th block of variables needs update */
+      if( delta > TolAbs/m && delta > TolRel*LIBQP_ABS(state.QP)/m)
+      {
+        /* for fixed u select v = argmax_{i in I_k} Improvement(i) */
+        if( u != -1 )
+        {
+          col_u = (float64_t*)get_col(u);
+          improv = -LIBQP_PLUS_INF;
+          for( j=0; j < nk[k]; j++ )
+          {
+            i = inx[LIBQP_INDEX(j,k,n)];
+
+            if(x[i] > 0 && i != uint32_t(u))
+            {
+              tmp_num = x[i]*(d[i] - d[u]);
+              tmp_den = x[i]*x[i]*(diag_H[u] - 2*col_u[i] + diag_H[i]);
+              if( tmp_den > 0 )
+              {
+                if( tmp_num < tmp_den )
+                  tmp = tmp_num*tmp_num / tmp_den;
+                else
+                  tmp = tmp_num - 0.5 * tmp_den;
+
+                if( tmp > improv )
+                {
+                  improv = tmp;
+                  tau = LIBQP_MIN(1,tmp_num/tmp_den);
+                  v = i;
+                }
+              }
+            }
+          }
+
+          /* check if virtual variable can be for updated */
+          if(x_neq[k] > 0 && S[k] != 0)
+          {
+            tmp_num = -x_neq[k]*d[u];
+            tmp_den = x_neq[k]*x_neq[k]*diag_H[u];
+            if( tmp_den > 0 )
+            {
+              if( tmp_num < tmp_den )
+                tmp = tmp_num*tmp_num / tmp_den;
+              else
+                tmp = tmp_num - 0.5 * tmp_den;
+
+              if( tmp > improv )
+              {
+                improv = tmp;
+                tau = LIBQP_MIN(1,tmp_num/tmp_den);
+                v = -1;
+              }
+            }
+          }
+
+          /* minimize objective w.r.t variable u and v */
+          if(v != -1)
+          {
+            tmp = x[v]*tau;
+            x[u] += tmp;
+            x[v] -= tmp;
+
+            /* update d = H*x + f */
+            col_v = (float64_t*)get_col(v);
+            for(i = 0; i < n; i++ )
+              d[i] += tmp*(col_u[i]-col_v[i]);
+          }
+          else
+          {
+            tmp = x_neq[k]*tau;
+            x[u] += tmp;
+            x_neq[k] -= tmp;
+
+            /* update d = H*x + f */
+            for(i = 0; i < n; i++ )
+              d[i] += tmp*col_u[i];
+          }
+        }
+        else
+        {
+          improv = -LIBQP_PLUS_INF;
+          for( j=0; j < nk[k]; j++ )
+          {
+            i = inx[LIBQP_INDEX(j,k,n)];
+
+            if(x[i] > 0)
+            {
+              tmp_num = x[i]*d[i];
+              tmp_den = x[i]*x[i]*diag_H[i];
+              if( tmp_den > 0 )
+              {
+                if( tmp_num < tmp_den )
+                  tmp = tmp_num*tmp_num / tmp_den;
+                else
+                  tmp = tmp_num - 0.5 * tmp_den;
+
+                if( tmp > improv )
+                {
+                  improv = tmp;
+                  tau = LIBQP_MIN(1,tmp_num/tmp_den);
+                  v = i;
+                }
+              }
+            }
+          }
+
+          tmp = x[v]*tau;
+          x_neq[k] += tmp;
+          x[v] -= tmp;
+
+          /* update d = H*x + f */
+          col_v = (float64_t*)get_col(v);
+          for(i = 0; i < n; i++ )
+            d[i] -= tmp*col_v[i];
+        }
+
+        /* update objective value */
+        state.QP = state.QP - improv;
+      }
+    }
+
+    /* Compute primal and dual objectives */
+    for( i=0, state.QP = 0, state.QD=0; i < n; i++)
+    {
+       state.QP += x[i]*(f[i]+d[i]);
+       state.QD += x[i]*(f[i]-d[i]);
+    }
+    state.QP = 0.5*state.QP;
+    state.QD = 0.5*state.QD;
+
+    for( k=0; k < m; k++ )
+    {
+      for( j=0,tmp = LIBQP_PLUS_INF; j < nk[k]; j++ ) {
+        i = inx[LIBQP_INDEX(j,k,n)];
+        tmp = LIBQP_MIN(tmp, d[i]);
+      }
+
+      if(S[k] == 0)
+        state.QD += b[k]*tmp;
+      else
+        state.QD += b[k]*LIBQP_MIN(tmp,0);
+    }
+
+    /* print state */
+    if( print_state != NULL)
+      print_state( state );
+
+    /* check stopping conditions */
+    if(state.QP-state.QD <= LIBQP_ABS(state.QP)*TolRel ) state.exitflag = 1;
+    else if( state.QP-state.QD <= TolAbs ) state.exitflag = 2;
+    else if( state.QP <= QP_TH ) state.exitflag = 3;
+    else if( state.nIter >= MaxIter) state.exitflag = 0;
+  }
+
+  /*----------------------------------------------------------
+    Clean up
+  ---------------------------------------------------------- */
+cleanup:
+  LIBQP_FREE( d );
+  LIBQP_FREE( inx );
+  LIBQP_FREE( nk );
+  LIBQP_FREE( x_neq );
+
+  return( state );
+}
+}
diff --git a/src/shogun/multiclass/MulticlassOCAS.cpp b/src/shogun/multiclass/MulticlassOCAS.cpp
new file mode 100644
index 00000000000..219b23c804e
--- /dev/null
+++ b/src/shogun/multiclass/MulticlassOCAS.cpp
@@ -0,0 +1,265 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Vojtech Franc, Soeren Sonnenburg, Sergey Lisitsyn
+ */
+
+#include <shogun/multiclass/MulticlassOCAS.h>
+
+#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/labels/MulticlassLabels.h>
+
+using namespace shogun;
+
+struct mocas_data
+{
+	CDotFeatures* features;
+	float64_t* W;
+	float64_t* oldW;
+	float64_t* full_A;
+	float64_t* data_y;
+	float64_t* output_values;
+	uint32_t nY;
+	uint32_t nData;
+	uint32_t nDim;
+	float64_t* new_a;
+};
+
+CMulticlassOCAS::CMulticlassOCAS() :
+	CLinearMulticlassMachine()
+{
+	register_parameters();
+	set_C(1.0);
+	set_epsilon(1e-2);
+	set_max_iter(1000000);
+	set_method(1);
+	set_buf_size(5000);
+}
+
+CMulticlassOCAS::CMulticlassOCAS(float64_t C, CDotFeatures* train_features, CLabels* train_labels) :
+	CLinearMulticlassMachine(new CMulticlassOneVsRestStrategy(), train_features, NULL, train_labels), m_C(C)
+{
+	register_parameters();
+	set_epsilon(1e-2);
+	set_max_iter(1000000);
+	set_method(1);
+	set_buf_size(5000);
+}
+
+void CMulticlassOCAS::register_parameters()
+{
+	SG_ADD(&m_C, "m_C", "regularization constant", MS_AVAILABLE);
+	SG_ADD(&m_epsilon, "m_epsilon", "solver relative tolerance", MS_NOT_AVAILABLE);
+	SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations", MS_NOT_AVAILABLE);
+	SG_ADD(&m_method, "m_method", "used solver method", MS_NOT_AVAILABLE);
+	SG_ADD(&m_buf_size, "m_buf_size", "buffer size", MS_NOT_AVAILABLE);
+}
+
+CMulticlassOCAS::~CMulticlassOCAS()
+{
+}
+
+bool CMulticlassOCAS::train_machine(CFeatures* data)
+{
+	if (data)
+		set_features((CDotFeatures*)data);
+
+	ASSERT(m_features)
+	ASSERT(m_labels)
+	ASSERT(m_multiclass_strategy)
+
+	int32_t num_vectors = m_features->get_num_vectors();
+	int32_t num_classes = m_multiclass_strategy->get_num_classes();
+	int32_t num_features = m_features->get_dim_feature_space();
+
+	float64_t C = m_C;
+	SGVector<float64_t> labels = ((CMulticlassLabels*) m_labels)->get_labels();
+	uint32_t nY = num_classes;
+	uint32_t nData = num_vectors;
+	float64_t TolRel = m_epsilon;
+	float64_t TolAbs = 0.0;
+	float64_t QPBound = 0.0;
+	float64_t MaxTime = m_max_train_time;
+	uint32_t BufSize = m_buf_size;
+	uint8_t Method = m_method;
+
+	mocas_data user_data;
+	user_data.features = m_features;
+	user_data.W = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
+	user_data.oldW = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
+	user_data.new_a = SG_CALLOC(float64_t, (int64_t)num_features*num_classes);
+	user_data.full_A = SG_CALLOC(float64_t, (int64_t)num_features*num_classes*m_buf_size);
+	user_data.output_values = SG_CALLOC(float64_t, num_vectors);
+	user_data.data_y = labels.vector;
+	user_data.nY = num_classes;
+	user_data.nDim = num_features;
+	user_data.nData = num_vectors;
+
+	ocas_return_value_T value =
+	msvm_ocas_solver(C, labels.vector, nY, nData, TolRel, TolAbs,
+	                 QPBound, MaxTime, BufSize, Method,
+	                 &CMulticlassOCAS::msvm_full_compute_W,
+	                 &CMulticlassOCAS::msvm_update_W,
+	                 &CMulticlassOCAS::msvm_full_add_new_cut,
+	                 &CMulticlassOCAS::msvm_full_compute_output,
+	                 &CMulticlassOCAS::msvm_sort_data,
+	                 &CMulticlassOCAS::msvm_print,
+	                 &user_data);
+
+	SG_DEBUG("Number of iterations [nIter] = %d \n",value.nIter)
+	SG_DEBUG("Number of cutting planes [nCutPlanes] = %d \n",value.nCutPlanes)
+	SG_DEBUG("Number of non-zero alphas [nNZAlpha] = %d \n",value.nNZAlpha)
+	SG_DEBUG("Number of training errors [trn_err] = %d \n",value.trn_err)
+	SG_DEBUG("Primal objective value [Q_P] = %f \n",value.Q_P)
+	SG_DEBUG("Dual objective value [Q_D] = %f \n",value.Q_D)
+	SG_DEBUG("Output time [output_time] = %f \n",value.output_time)
+	SG_DEBUG("Sort time [sort_time] = %f \n",value.sort_time)
+	SG_DEBUG("Add time [add_time] = %f \n",value.add_time)
+	SG_DEBUG("W time [w_time] = %f \n",value.w_time)
+	SG_DEBUG("QP solver time [qp_solver_time] = %f \n",value.qp_solver_time)
+	SG_DEBUG("OCAS time [ocas_time] = %f \n",value.ocas_time)
+	SG_DEBUG("Print time [print_time] = %f \n",value.print_time)
+	SG_DEBUG("QP exit flag [qp_exitflag] = %d \n",value.qp_exitflag)
+	SG_DEBUG("Exit flag [exitflag] = %d \n",value.exitflag)
+
+	m_machines->reset_array();
+	for (int32_t i=0; i<num_classes; i++)
+	{
+		CLinearMachine* machine = new CLinearMachine();
+		machine->set_w(SGVector<float64_t>(&user_data.W[i*num_features],num_features,false).clone());
+
+		m_machines->push_back(machine);
+	}
+
+	SG_FREE(user_data.W);
+	SG_FREE(user_data.oldW);
+	SG_FREE(user_data.new_a);
+	SG_FREE(user_data.full_A);
+	SG_FREE(user_data.output_values);
+
+	return true;
+}
+
+float64_t CMulticlassOCAS::msvm_update_W(float64_t t, void* user_data)
+{
+	float64_t* oldW = ((mocas_data*)user_data)->oldW;
+	uint32_t nY = ((mocas_data*)user_data)->nY;
+	uint32_t nDim = ((mocas_data*)user_data)->nDim;
+	SGVector<float64_t> W(((mocas_data*)user_data)->W, nDim*nY, false);
+
+	for(uint32_t j=0; j < nY*nDim; j++)
+		W[j] = oldW[j]*(1-t) + t*W[j];
+
+	float64_t sq_norm_W = linalg::dot(W,W);
+
+	return sq_norm_W;
+}
+
+void CMulticlassOCAS::msvm_full_compute_W(float64_t *sq_norm_W, float64_t *dp_WoldW,
+                                          float64_t *alpha, uint32_t nSel, void* user_data)
+{
+	float64_t* full_A = ((mocas_data*)user_data)->full_A;
+	uint32_t nY = ((mocas_data*)user_data)->nY;
+	uint32_t nDim = ((mocas_data*)user_data)->nDim;
+	SGVector<float64_t> W(((mocas_data*)user_data)->W, nDim*nY, false);
+	SGVector<float64_t> oldW(((mocas_data*)user_data)->oldW, nDim*nY, false);
+
+	uint32_t i,j;
+
+	sg_memcpy(oldW.vector, W.vector, sizeof(float64_t)*nDim*nY);
+	linalg::zero(W);
+
+	for(i=0; i<nSel; i++)
+	{
+		if(alpha[i] > 0)
+		{
+			for(j=0; j<nDim*nY; j++)
+				W[j] += alpha[i]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
+		}
+	}
+
+	*sq_norm_W = linalg::dot(W,W);
+	*dp_WoldW = linalg::dot(W,oldW);
+
+	return;
+}
+
+int CMulticlassOCAS::msvm_full_add_new_cut(float64_t *new_col_H, uint32_t *new_cut,
+                                           uint32_t nSel, void* user_data)
+{
+	float64_t* full_A = ((mocas_data*)user_data)->full_A;
+	float64_t* data_y = ((mocas_data*)user_data)->data_y;
+	uint32_t nY = ((mocas_data*)user_data)->nY;
+	uint32_t nDim = ((mocas_data*)user_data)->nDim;
+	uint32_t nData = ((mocas_data*)user_data)->nData;
+	SGVector<float64_t> new_a(((mocas_data*)user_data)->new_a, nDim*nY, false);
+	CDotFeatures* features = ((mocas_data*)user_data)->features;
+
+	float64_t sq_norm_a;
+	uint32_t i, j, y, y2;
+
+	linalg::zero(new_a);
+
+	for(i=0; i < nData; i++)
+	{
+		y = (uint32_t)(data_y[i]);
+		y2 = (uint32_t)new_cut[i];
+		if(y2 != y)
+		{
+			features->add_to_dense_vec(1.0,i,&new_a[nDim*y],nDim);
+			features->add_to_dense_vec(-1.0,i,&new_a[nDim*y2],nDim);
+		}
+	}
+
+	// compute new_a'*new_a and insert new_a to the last column of full_A
+	sq_norm_a = linalg::dot(new_a,new_a);
+	for(j=0; j < nDim*nY; j++ )
+		full_A[LIBOCAS_INDEX(j,nSel,nDim*nY)] = new_a[j];
+
+	new_col_H[nSel] = sq_norm_a;
+	for(i=0; i < nSel; i++)
+	{
+		float64_t tmp = 0;
+
+		for(j=0; j < nDim*nY; j++ )
+			tmp += new_a[j]*full_A[LIBOCAS_INDEX(j,i,nDim*nY)];
+
+		new_col_H[i] = tmp;
+	}
+
+	return 0;
+}
+
+int CMulticlassOCAS::msvm_full_compute_output(float64_t *output, void* user_data)
+{
+	float64_t* W = ((mocas_data*)user_data)->W;
+	uint32_t nY = ((mocas_data*)user_data)->nY;
+	uint32_t nDim = ((mocas_data*)user_data)->nDim;
+	uint32_t nData = ((mocas_data*)user_data)->nData;
+	float64_t* output_values = ((mocas_data*)user_data)->output_values;
+	CDotFeatures* features = ((mocas_data*)user_data)->features;
+
+	uint32_t i, y;
+
+	for(y=0; y<nY; y++)
+	{
+		features->dense_dot_range(output_values,0,nData,NULL,&W[nDim*y],nDim,0.0);
+		for (i=0; i<nData; i++)
+			output[LIBOCAS_INDEX(y,i,nY)] = output_values[i];
+	}
+
+	return 0;
+}
+
+int CMulticlassOCAS::msvm_sort_data(float64_t* vals, float64_t* data, uint32_t size)
+{
+	CMath::qsort_index(vals, data, size);
+	return 0;
+}
+
+void CMulticlassOCAS::msvm_print(ocas_return_value_T value)
+{
+}
+
diff --git a/src/shogun/multiclass/MulticlassOCAS.h b/src/shogun/multiclass/MulticlassOCAS.h
new file mode 100644
index 00000000000..d177c4b502c
--- /dev/null
+++ b/src/shogun/multiclass/MulticlassOCAS.h
@@ -0,0 +1,157 @@
+#ifndef _MULTICLASSOCAS_H___
+#define _MULTICLASSOCAS_H___
+
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Sergey Lisitsyn
+ */
+
+#include <shogun/lib/config.h>
+#include <shogun/lib/common.h>
+#include <shogun/features/DotFeatures.h>
+#include <shogun/lib/external/libocas.h>
+#include <shogun/machine/LinearMulticlassMachine.h>
+
+namespace shogun
+{
+
+/** @brief multiclass OCAS wrapper */
+class CMulticlassOCAS : public CLinearMulticlassMachine
+{
+	public:
+		MACHINE_PROBLEM_TYPE(PT_MULTICLASS)
+
+		/** default constructor  */
+		CMulticlassOCAS();
+
+		/** standard constructor
+		 * @param C C regularication constant value
+		 * @param features features
+		 * @param labs labels
+		 */
+		CMulticlassOCAS(float64_t C, CDotFeatures* features, CLabels* labs);
+
+		/** destructor */
+		virtual ~CMulticlassOCAS();
+
+		/** get name */
+		virtual const char* get_name() const
+		{
+			return "MulticlassOCAS";
+		}
+
+		/** set C
+		 * @param C C value
+		 */
+		inline void set_C(float64_t C)
+		{
+			ASSERT(C>0)
+			m_C = C;
+		}
+		/** get C
+		 * @return C value
+		 */
+		inline float64_t get_C() const { return m_C; }
+
+		/** set epsilon
+		 * @param epsilon epsilon value
+		 */
+		inline void set_epsilon(float64_t epsilon)
+		{
+			ASSERT(epsilon>0)
+			m_epsilon = epsilon;
+		}
+		/** get epsilon
+		 * @return epsilon value
+		 */
+		inline float64_t get_epsilon() const { return m_epsilon; }
+
+		/** set max iter
+		 * @param max_iter max iter value
+		 */
+		inline void set_max_iter(int32_t max_iter)
+		{
+			ASSERT(max_iter>0)
+			m_max_iter = max_iter;
+		}
+		/** get max iter
+		 * @return max iter value
+		 */
+		inline int32_t get_max_iter() const { return m_max_iter; }
+
+		/** set method
+		 * @param method method value
+		 */
+		inline void set_method(int32_t method)
+		{
+			ASSERT(method==0 || method==1)
+			m_method = method;
+		}
+		/** get method
+		 * @return method value
+		 */
+		inline int32_t get_method() const { return m_method; }
+
+		/** set buf size
+		 * @param buf_size buf size value
+		 */
+		inline void set_buf_size(int32_t buf_size)
+		{
+			ASSERT(buf_size>0)
+			m_buf_size = buf_size;
+		}
+		/** get buf size
+		 * @return buf_size value
+		 */
+		inline int32_t get_buf_size() const { return m_buf_size; }
+
+protected:
+
+		/** train machine */
+		virtual bool train_machine(CFeatures* data = NULL);
+
+		/** update W */
+		static float64_t msvm_update_W(float64_t t, void* user_data);
+
+		/** full compute W */
+		static void msvm_full_compute_W(float64_t *sq_norm_W, float64_t *dp_WoldW,
+		                                float64_t *alpha, uint32_t nSel, void* user_data);
+
+		/** full add new cut */
+		static int msvm_full_add_new_cut(float64_t *new_col_H, uint32_t *new_cut,
+		                                 uint32_t nSel, void* user_data);
+
+		/** full compute output */
+		static int msvm_full_compute_output(float64_t *output, void* user_data);
+
+		/** sort */
+		static int msvm_sort_data(float64_t* vals, float64_t* data, uint32_t size);
+
+		/** print nothing */
+		static void msvm_print(ocas_return_value_T value);
+
+private:
+
+		/** register parameters */
+		void register_parameters();
+
+protected:
+
+		/** regularization constant for each machine */
+		float64_t m_C;
+
+		/** tolerance */
+		float64_t m_epsilon;
+
+		/** max number of iterations */
+		int32_t m_max_iter;
+
+		/** method */
+		int32_t m_method;
+
+		/** buffer size */
+		int32_t m_buf_size;
+};
+}
+#endif
diff --git a/tests/unit/classifier/svm/SVMOcas_unittest.cc b/tests/unit/classifier/svm/SVMOcas_unittest.cc
index 25505062d76..8cfc5a58ead 100644
--- a/tests/unit/classifier/svm/SVMOcas_unittest.cc
+++ b/tests/unit/classifier/svm/SVMOcas_unittest.cc
@@ -1,20 +1,17 @@
+#include <gtest/gtest.h>
+
 #include <shogun/lib/config.h>
-#ifdef USE_GPL_SHOGUN
 #include <shogun/classifier/svm/SVMOcas.h>
-#endif // USE_GPL_SHOGUN
 #include <shogun/evaluation/ContingencyTableEvaluation.h>
 #include <shogun/features/DataGenerator.h>
 #include <shogun/features/DenseFeatures.h>
 
-#include <gtest/gtest.h>
-
 #include "environments/LinearTestEnvironment.h"
 
 using namespace shogun;
 
 extern LinearTestEnvironment* linear_test_env;
 
-#ifdef USE_GPL_SHOGUN
 #ifdef HAVE_LAPACK
 TEST(SVMOcasTest,train)
 {
@@ -44,5 +41,3 @@ TEST(SVMOcasTest,train)
 	SG_UNREF(pred);
 }
 #endif // HAVE_LAPACK
-#endif //USE_GPL_SHOGUN
-
diff --git a/tests/unit/latent/LatentSVM_unittest.cc b/tests/unit/latent/LatentSVM_unittest.cc
index d23c838215b..ec040e4248c 100644
--- a/tests/unit/latent/LatentSVM_unittest.cc
+++ b/tests/unit/latent/LatentSVM_unittest.cc
@@ -1,7 +1,6 @@
 #include "MockLatentModel.h"
 #include <shogun/lib/config.h>
 
-#ifdef USE_GPL_SHOGUN
 #include <shogun/latent/LatentSVM.h>
 
 using namespace shogun;
@@ -107,4 +106,3 @@ TEST(LatentSVM, apply)
 	SG_UNREF(lsvm);
 	SG_UNREF(dense_feats);
 }
-#endif //USE_GPL_SHOGUN
diff --git a/tests/unit/multiclass/MulticlassOCAS_unittest.cc b/tests/unit/multiclass/MulticlassOCAS_unittest.cc
index 805086b4d52..0075aa8fcc3 100644
--- a/tests/unit/multiclass/MulticlassOCAS_unittest.cc
+++ b/tests/unit/multiclass/MulticlassOCAS_unittest.cc
@@ -1,9 +1,9 @@
+#include <gtest/gtest.h>
+
 #include "environments/MultiLabelTestEnvironment.h"
 #include <shogun/evaluation/MulticlassAccuracy.h>
 #include <shogun/features/DataGenerator.h>
 #include <shogun/features/DenseFeatures.h>
-#include <gtest/gtest.h>
-#ifdef USE_GPL_SHOGUN
 #include <shogun/multiclass/MulticlassOCAS.h>
 
 using namespace shogun;
@@ -36,4 +36,3 @@ TEST(MulticlassOCASTest,train)
   SG_UNREF(pred);
 }
 #endif // HAVE_LAPACK
-#endif //USE_GPL_SHOGUN