shogun-toolbox · kislayabhi · Mar 8, 2014 · Mar 8, 2014 · Mar 14, 2014 · Mar 16, 2014
diff --git a/data b/data
diff --git a/doc/ipython-notebooks/pca/pca_notebook.ipynb b/doc/ipython-notebooks/pca/pca_notebook.ipynb
diff --git a/src/shogun/features/RandomKitchenSinksDotFeatures.h b/src/shogun/features/RandomKitchenSinksDotFeatures.h
@@ -16,25 +16,32 @@
 namespace shogun
 {
 
-/** @brief class that implements the Random Kitchen Sinks for the DotFeatures
+/** @brief class that implements the Random Kitchen Sinks (RKS) for the DotFeatures
  * as mentioned in http://books.nips.cc/papers/files/nips21/NIPS2008_0885.pdf.
  *
- * The Random Kitchen Sinks algorithm expects:
- *		a dataset to work on
- *		a function phi such that |phi(x; a)| <= 1, the a's are the function parameters
- *		a probability distrubution p, from which to draw the a's
- *		the number of samples K to draw from p.
+ * RKS input:
+ *		- a dataset $\{x_i, y_i\}_{i=1,\dots,m}$ of $m$ points to work on
+ *		- $\phi(x; w)$: a bounded feature function s.t. $|\phi(x; w)| \leq 1$, where $w$ is the function parameter
+ *		- $p(w)$: a probability distrubution function, from which to draw the $w$
+ *		- $K$: the number of samples to draw from $p(w)$
+ * 		- $C$: a scalar, which is chosen to be large enough in practice.
  *
- * Then:
- *		it draws K a's from p
- *		it computes for each vector in the dataset
- *			Zi = [phi(Xi;a0), ..., phi(Xi;aK)]
- *		and then solves the empirical risk minimization problem for all Zi, either
- *			through least squares or through a linear SVM.
+ * RKS output:
+ * 		A function $\hat{f}(x) = \sum_{k=1}^{K} \phi(x; w_k)\alpha_k$
+ *		1. Draw $w_1,\dots,w_K$ iid from $p(w)$
+ *		2. Featurize the input: $z_i = [\phi(x_i; w_1),\dots,\phi(x_i; w_K)]^{\top}$
+ *		3. With $w$ fixed, solve the empirical risk minimization problem:
+ * 		\begin{equation}
+ * 		\underset{\alpha \in \mathbf{R}^K}{\text{minimize}} \quad \frac{1}{m}\sum_{i=1}^{m} c(\alpha^{\top} z_i, y_i)
+ * 		\end{equation}
+ * 		\begin{equation}
+ * 		\text{s.t.} \quad \|\alpha\|_{\infty} \leq C/K.
+ * 		\end{equation}
+ *		  for vector $\alpha$, either through least squares when $c(y', y)$ is the quadratic loss or through a linear SVM when $c(y', y)$ is the hinge loss.
  *
  * This class implements the vector transformation on-the-fly whenever it is needed.
  * In order for it to work, the class expects the user to implement a subclass of
- * CRKSFunctions and implement in there the functions phi and p and then pass an
+ * CRKSFunctions and implement in there the functions $\phi$ and $p$ and then pass an
  * instantiated object of that class to the constructor.
  *
  * Further useful resources, include :

diff --git a/src/shogun/machine/gp/LikelihoodModel.h b/src/shogun/machine/gp/LikelihoodModel.h
@@ -27,7 +27,8 @@ enum ELikelihoodModelType
 	LT_GAUSSIAN=10,
 	LT_STUDENTST=20,
 	LT_LOGIT=30,
-	LT_PROBIT=40
+	LT_PROBIT=40,
+	LT_SOFTMAX=50
 };
 
 /** @brief The Likelihood model base class.

diff --git a/src/shogun/machine/gp/SoftMaxLikelihood.cpp b/src/shogun/machine/gp/SoftMaxLikelihood.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) The Shogun Machine Learning Toolbox
+ * Written (w) 2014 Parijat Mazumdar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those
+ * of the authors and should not be interpreted as representing official policies,
+ * either expressed or implied, of the Shogun Development Team.
+ */
+
+#include <shogun/machine/gp/SoftMaxLikelihood.h>
+
+#ifdef HAVE_EIGEN3
+
+#include <shogun/labels/MulticlassLabels.h>
+#include <shogun/mathematics/eigen3.h>
+
+using namespace shogun;
+using namespace Eigen;
+
+CSoftMaxLikelihood::CSoftMaxLikelihood() : CLikelihoodModel()
+{
+}
+
+CSoftMaxLikelihood::~CSoftMaxLikelihood()
+{
+}
+
+SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_f(const CLabels* lab,
+					 SGVector<float64_t> func) const
+{
+	REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
+	REQUIRE(lab->get_label_type()==LT_MULTICLASS,
+			"Labels must be type of CMulticlassLabels\n")
+
+	SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
+	for (int32_t i=0;i<labels.vlen;i++)
+		REQUIRE(((labels[i]>-1)&&(labels[i]<func.vlen/labels.vlen)),
+		 "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
+		"%d\n",func.vlen/labels.vlen,i,labels[i]);
+
+	// labels.vlen=num_rows  func.vlen/num_rows=num_cols
+	Map<MatrixXd> eigen_f(func.vector,labels.vlen,func.vlen/labels.vlen);
+
+	// log_sum_exp trick
+	VectorXd max_coeff=eigen_f.rowwise().maxCoeff();
+	eigen_f=eigen_f-max_coeff*MatrixXd::Ones(1,eigen_f.cols());
+	VectorXd log_sum_exp=((eigen_f.array().exp()).rowwise().sum()).array().log();
+	log_sum_exp=log_sum_exp+max_coeff;
+
+	// restore original matrix
+	eigen_f=eigen_f+max_coeff*MatrixXd::Ones(1,eigen_f.cols());
+
+	SGVector<float64_t> ret=SGVector<float64_t>(labels.vlen);
+	Map<VectorXd> eigen_ret(ret.vector,ret.vlen);
+
+	for (int32_t i=0;i<labels.vlen;i++)
+		eigen_ret(i)=eigen_f(i,labels[i]);
+
+	eigen_ret=eigen_ret-log_sum_exp;
+
+	return ret;
+}
+
+SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative_f(
+		const CLabels* lab, SGVector<float64_t> func, index_t i) const
+{
+	int32_t num_rows=lab->get_num_labels();
+	int32_t num_cols=func.vlen/num_rows;
+	SGMatrix<float64_t> f=SGMatrix<float64_t>(func.vector,num_rows,num_cols,false);
+
+	if (i==1)
+		return get_log_probability_derivative1_f(lab,f);
+	else if (i==2)
+		return get_log_probability_derivative2_f(f);
+	else
+		return get_log_probability_derivative3_f(f);
+}
+
+SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative1_f(
+			const CLabels* lab, SGMatrix<float64_t> func) const
+{
+	REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
+	REQUIRE(lab->get_label_type()==LT_MULTICLASS,
+			"Labels must be type of CMulticlassLabels\n")
+	REQUIRE(lab->get_num_labels()==func.num_rows, "Number of labels must match "
+			"number of vectors in function\n")
+
+	SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
+	for (int32_t i=0;i<labels.vlen;i++)
+		REQUIRE(((labels[i]>-1)&&(labels[i]<func.num_cols)),
+		 "Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
+		"%d\n",func.num_cols,i,labels[i]);
+
+	SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols);
+	memcpy(ret.vector,func.matrix,func.num_rows*func.num_cols*sizeof(float64_t));
+
+	Map<MatrixXd> eigen_ret(ret.vector,func.num_rows,func.num_cols);
+	eigen_ret=eigen_ret.array().exp();
+	eigen_ret=eigen_ret.cwiseQuotient(eigen_ret.rowwise().sum()*MatrixXd::Ones(1,func.num_cols));
+
+	MatrixXd y=MatrixXd::Zero(func.num_rows,func.num_cols);
+
+	for (int32_t i=0;i<labels.vlen;i++)
+		y(i,labels[i])=1.;
+
+	eigen_ret=y-eigen_ret;
+
+	return ret;
+}
+
+SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative2_f(SGMatrix<float64_t> func) const
+{
+	SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols*func.num_cols);
+	Map<MatrixXd> eigen_ret(ret.vector,func.num_rows*func.num_cols,func.num_cols);
+
+	Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
+
+	MatrixXd f1= eigen_f.array().exp();
+	f1=f1.cwiseQuotient(f1.rowwise().sum()*MatrixXd::Ones(1,f1.cols()));
+
+	for (int32_t i=0;i<eigen_f.rows();i++)
+	{
+		eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())=
+						f1.transpose().col(i)*f1.row(i);
+		VectorXd D=eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())
+								.diagonal().array().sqrt();
+		eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())-=
+								MatrixXd(D.asDiagonal());		
+	}
+
+	return ret;
+}
+
+SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative3_f(SGMatrix<float64_t>
+										 func) const
+{
+	SGVector<float64_t> ret=SGVector<float64_t>(CMath::pow(func.num_cols,3)*func.num_rows);
+
+	Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);
+
+	MatrixXd f1= eigen_f.array().exp();
+	f1=f1.cwiseQuotient(f1.rowwise().sum()*MatrixXd::Ones(1,f1.cols()));
+
+	for (int32_t i=0;i<func.num_rows;i++)
+	{
+		for (int32_t c1=0;c1<func.num_cols;c1++)
+		{
+			for (int32_t c2=0;c2<func.num_cols;c2++)
+			{
+				for (int32_t c3=0;c3<func.num_cols;c3++)
+				{
+					float64_t sum_temp=0;
+					if ((c1==c2) && (c2==c3))
+						sum_temp+=f1(i,c1);
+					if (c1==c2)
+						sum_temp=sum_temp-f1(i,c1)*f1(i,c3);
+					if (c1==c3)
+						sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
+					if (c2==c3)
+						sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
+					sum_temp+=2.0*f1(i,c1)*f1(i,c2)*f1(i,c3);
+
+					ret[i*CMath::pow(func.num_cols,3)+
+					c1*CMath::pow(func.num_cols,2)+c2*func.num_cols+c3]=sum_temp;
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
+#endif /* HAVE_EIGEN3 */