Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Documentation regarding issue #1878 #1961

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data
Submodule data updated from 6615cf to 464bbd
1,263 changes: 1,263 additions & 0 deletions doc/ipython-notebooks/pca/pca_notebook.ipynb

Large diffs are not rendered by default.

33 changes: 20 additions & 13 deletions src/shogun/features/RandomKitchenSinksDotFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,32 @@
namespace shogun
{

/** @brief class that implements the Random Kitchen Sinks for the DotFeatures
/** @brief class that implements the Random Kitchen Sinks (RKS) for the DotFeatures
* as mentioned in http://books.nips.cc/papers/files/nips21/NIPS2008_0885.pdf.
*
* The Random Kitchen Sinks algorithm expects:
* a dataset to work on
* a function phi such that |phi(x; a)| <= 1, the a's are the function parameters
* a probability distrubution p, from which to draw the a's
* the number of samples K to draw from p.
* RKS input:
* - a dataset $\{x_i, y_i\}_{i=1,\dots,m}$ of $m$ points to work on
* - $\phi(x; w)$: a bounded feature function s.t. $|\phi(x; w)| \leq 1$, where $w$ is the function parameter
* - $p(w)$: a probability distrubution function, from which to draw the $w$
* - $K$: the number of samples to draw from $p(w)$
* - $C$: a scalar, which is chosen to be large enough in practice.
*
* Then:
* it draws K a's from p
* it computes for each vector in the dataset
* Zi = [phi(Xi;a0), ..., phi(Xi;aK)]
* and then solves the empirical risk minimization problem for all Zi, either
* through least squares or through a linear SVM.
* RKS output:
* A function $\hat{f}(x) = \sum_{k=1}^{K} \phi(x; w_k)\alpha_k$
* 1. Draw $w_1,\dots,w_K$ iid from $p(w)$
* 2. Featurize the input: $z_i = [\phi(x_i; w_1),\dots,\phi(x_i; w_K)]^{\top}$
* 3. With $w$ fixed, solve the empirical risk minimization problem:
* \begin{equation}
* \underset{\alpha \in \mathbf{R}^K}{\text{minimize}} \quad \frac{1}{m}\sum_{i=1}^{m} c(\alpha^{\top} z_i, y_i)
* \end{equation}
* \begin{equation}
* \text{s.t.} \quad \|\alpha\|_{\infty} \leq C/K.
* \end{equation}
* for vector $\alpha$, either through least squares when $c(y', y)$ is the quadratic loss or through a linear SVM when $c(y', y)$ is the hinge loss.
*
* This class implements the vector transformation on-the-fly whenever it is needed.
* In order for it to work, the class expects the user to implement a subclass of
* CRKSFunctions and implement in there the functions phi and p and then pass an
* CRKSFunctions and implement in there the functions $\phi$ and $p$ and then pass an
* instantiated object of that class to the constructor.
*
* Further useful resources, include :
Expand Down
3 changes: 2 additions & 1 deletion src/shogun/machine/gp/LikelihoodModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ enum ELikelihoodModelType
LT_GAUSSIAN=10,
LT_STUDENTST=20,
LT_LOGIT=30,
LT_PROBIT=40
LT_PROBIT=40,
LT_SOFTMAX=50
};

/** @brief The Likelihood model base class.
Expand Down
194 changes: 194 additions & 0 deletions src/shogun/machine/gp/SoftMaxLikelihood.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Parijat Mazumdar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <shogun/machine/gp/SoftMaxLikelihood.h>

#ifdef HAVE_EIGEN3

#include <shogun/labels/MulticlassLabels.h>
#include <shogun/mathematics/eigen3.h>

using namespace shogun;
using namespace Eigen;

CSoftMaxLikelihood::CSoftMaxLikelihood() : CLikelihoodModel()
{
}

CSoftMaxLikelihood::~CSoftMaxLikelihood()
{
}

SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_f(const CLabels* lab,
SGVector<float64_t> func) const
{
REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
REQUIRE(lab->get_label_type()==LT_MULTICLASS,
"Labels must be type of CMulticlassLabels\n")

SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
for (int32_t i=0;i<labels.vlen;i++)
REQUIRE(((labels[i]>-1)&&(labels[i]<func.vlen/labels.vlen)),
"Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
"%d\n",func.vlen/labels.vlen,i,labels[i]);

// labels.vlen=num_rows func.vlen/num_rows=num_cols
Map<MatrixXd> eigen_f(func.vector,labels.vlen,func.vlen/labels.vlen);

// log_sum_exp trick
VectorXd max_coeff=eigen_f.rowwise().maxCoeff();
eigen_f=eigen_f-max_coeff*MatrixXd::Ones(1,eigen_f.cols());
VectorXd log_sum_exp=((eigen_f.array().exp()).rowwise().sum()).array().log();
log_sum_exp=log_sum_exp+max_coeff;

// restore original matrix
eigen_f=eigen_f+max_coeff*MatrixXd::Ones(1,eigen_f.cols());

SGVector<float64_t> ret=SGVector<float64_t>(labels.vlen);
Map<VectorXd> eigen_ret(ret.vector,ret.vlen);

for (int32_t i=0;i<labels.vlen;i++)
eigen_ret(i)=eigen_f(i,labels[i]);

eigen_ret=eigen_ret-log_sum_exp;

return ret;
}

SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative_f(
const CLabels* lab, SGVector<float64_t> func, index_t i) const
{
int32_t num_rows=lab->get_num_labels();
int32_t num_cols=func.vlen/num_rows;
SGMatrix<float64_t> f=SGMatrix<float64_t>(func.vector,num_rows,num_cols,false);

if (i==1)
return get_log_probability_derivative1_f(lab,f);
else if (i==2)
return get_log_probability_derivative2_f(f);
else
return get_log_probability_derivative3_f(f);
}

SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative1_f(
const CLabels* lab, SGMatrix<float64_t> func) const
{
REQUIRE(lab, "Labels are required (lab should not be NULL)\n")
REQUIRE(lab->get_label_type()==LT_MULTICLASS,
"Labels must be type of CMulticlassLabels\n")
REQUIRE(lab->get_num_labels()==func.num_rows, "Number of labels must match "
"number of vectors in function\n")

SGVector<int32_t> labels=((CMulticlassLabels*) lab)->get_int_labels();
for (int32_t i=0;i<labels.vlen;i++)
REQUIRE(((labels[i]>-1)&&(labels[i]<func.num_cols)),
"Labels must be between 0 and C(ie %d here). Currently labels[%d] is"
"%d\n",func.num_cols,i,labels[i]);

SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols);
memcpy(ret.vector,func.matrix,func.num_rows*func.num_cols*sizeof(float64_t));

Map<MatrixXd> eigen_ret(ret.vector,func.num_rows,func.num_cols);
eigen_ret=eigen_ret.array().exp();
eigen_ret=eigen_ret.cwiseQuotient(eigen_ret.rowwise().sum()*MatrixXd::Ones(1,func.num_cols));

MatrixXd y=MatrixXd::Zero(func.num_rows,func.num_cols);

for (int32_t i=0;i<labels.vlen;i++)
y(i,labels[i])=1.;

eigen_ret=y-eigen_ret;

return ret;
}

SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative2_f(SGMatrix<float64_t> func) const
{
SGVector<float64_t> ret=SGVector<float64_t>(func.num_rows*func.num_cols*func.num_cols);
Map<MatrixXd> eigen_ret(ret.vector,func.num_rows*func.num_cols,func.num_cols);

Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);

MatrixXd f1= eigen_f.array().exp();
f1=f1.cwiseQuotient(f1.rowwise().sum()*MatrixXd::Ones(1,f1.cols()));

for (int32_t i=0;i<eigen_f.rows();i++)
{
eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())=
f1.transpose().col(i)*f1.row(i);
VectorXd D=eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())
.diagonal().array().sqrt();
eigen_ret.block(i*eigen_f.cols(),0,eigen_f.cols(),eigen_f.cols())-=
MatrixXd(D.asDiagonal());
}

return ret;
}

SGVector<float64_t> CSoftMaxLikelihood::get_log_probability_derivative3_f(SGMatrix<float64_t>
func) const
{
SGVector<float64_t> ret=SGVector<float64_t>(CMath::pow(func.num_cols,3)*func.num_rows);

Map<MatrixXd> eigen_f(func.matrix,func.num_rows,func.num_cols);

MatrixXd f1= eigen_f.array().exp();
f1=f1.cwiseQuotient(f1.rowwise().sum()*MatrixXd::Ones(1,f1.cols()));

for (int32_t i=0;i<func.num_rows;i++)
{
for (int32_t c1=0;c1<func.num_cols;c1++)
{
for (int32_t c2=0;c2<func.num_cols;c2++)
{
for (int32_t c3=0;c3<func.num_cols;c3++)
{
float64_t sum_temp=0;
if ((c1==c2) && (c2==c3))
sum_temp+=f1(i,c1);
if (c1==c2)
sum_temp=sum_temp-f1(i,c1)*f1(i,c3);
if (c1==c3)
sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
if (c2==c3)
sum_temp=sum_temp-f1(i,c1)*f1(i,c2);
sum_temp+=2.0*f1(i,c1)*f1(i,c2)*f1(i,c3);

ret[i*CMath::pow(func.num_cols,3)+
c1*CMath::pow(func.num_cols,2)+c2*func.num_cols+c3]=sum_temp;
}
}
}
}

return ret;
}

#endif /* HAVE_EIGEN3 */
Loading