Skip to content

Commit

Permalink
Merge pull request #228 from karlnapf/master
Browse files Browse the repository at this point in the history
first working grid search with a kernel
  • Loading branch information
Soeren Sonnenburg committed Jul 22, 2011
2 parents 31d21d3 + 77d27f6 commit 9d9ea5c
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 29 deletions.
3 changes: 2 additions & 1 deletion examples/undocumented/libshogun/Makefile
Expand Up @@ -21,7 +21,8 @@ TARGETS = basic_minimal classifier_libsvm classifier_minimal_svm \
modelselection_model_selection_parameters_test \
modelselection_parameter_tree \
modelselection_apply_parameter_tree \
modelselection_grid_search_simple features_subset_labels \
modelselection_grid_search_linear features_subset_labels \
modelselection_grid_search_kernel\
features_subset_simple_features \
features_subset_sparse_features \
mathematics_confidence_intervals \
Expand Down
146 changes: 146 additions & 0 deletions examples/undocumented/libshogun/modelselection_grid_search_kernel.cpp
@@ -0,0 +1,146 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/

#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/features/Labels.h>
#include <shogun/features/SimpleFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PowerKernel.h>
#include <shogun/distance/MinkowskiMetric.h>


using namespace shogun;

void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}

CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();

CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(-5, 5, R_EXP);

CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(-5, 5, R_EXP);

CGaussianKernel* gaussian_kernel=new CGaussianKernel();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
CModelSelectionParameters* gaussian_kernel_width=
new CModelSelectionParameters("width");
gaussian_kernel_width->build_values(-5, 5, R_EXP, 1, 2);
param_gaussian_kernel->append_child(gaussian_kernel_width);
root->append_child(param_gaussian_kernel);

CPowerKernel* power_kernel=new CPowerKernel();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);

root->append_child(param_power_kernel);

CModelSelectionParameters* param_power_kernel_degree=
new CModelSelectionParameters("degree");
param_power_kernel_degree->build_values(1, 1, R_EXP);
param_power_kernel->append_child(param_power_kernel_degree);

CMinkowskiMetric* m_metric=new CMinkowskiMetric(10);
CModelSelectionParameters* param_power_kernel_metric1=
new CModelSelectionParameters("distance", m_metric);

param_power_kernel->append_child(param_power_kernel_metric1);

CModelSelectionParameters* param_power_kernel_metric1_k=
new CModelSelectionParameters("k");
param_power_kernel_metric1_k->build_values(1, 12, R_LINEAR);
param_power_kernel_metric1->append_child(param_power_kernel_metric1_k);

return root;
}

int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);

int32_t num_subsets=3;
int32_t num_vectors=20;
int32_t dim_vectors=3;

/* create some data and labels */
float64_t* matrix=new float64_t[num_vectors*dim_vectors];
CLabels* labels=new CLabels(num_vectors);
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
matrix[i]=CMath::randn_double();

/* create num_feautres 2-dimensional vectors */
CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> ();
features->set_feature_matrix(matrix, dim_vectors, num_vectors);

/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);

/* create linear classifier (use -s 2 option to avoid warnings) */
CLibSVM* classifier=new CLibSVM();
classifier->set_store_sv_features(true);

/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);

/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);

/* cross validation class for evaluation in model selection, 3 repetitions */
CCrossValidation* cross=new CCrossValidation(classifier, features, labels,
splitting_strategy, evaluation_criterium);
cross->set_num_runs(3);

/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();

/* this is on the stack and handles all of the above structures in memory */
CGridSearchModelSelection grid_search(param_tree, cross);

CParameterCombination* best_combination=grid_search.select_model();
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();

best_combination->apply_to_machine(classifier);

/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(100);
cross->set_conf_int_alpha(0.01);
CrossValidationResult result=cross->evaluate();
SG_SPRINT("result: ");
result.print_result();

/* clean up destroy result parameter */
SG_UNREF(best_combination);

SG_SPRINT("\nEND\n");
exit_shogun();

return 0;
}

31 changes: 16 additions & 15 deletions src/shogun/evaluation/CrossValidation.cpp
Expand Up @@ -58,7 +58,7 @@ void CCrossValidation::init()
m_splitting_strategy=NULL;
m_evaluation_criterium=NULL;
m_num_runs=1;
m_conf_int_p=0;
m_conf_int_alpha=0;

m_parameters->add((CSGObject**) &m_machine, "machine",
"Used learning machine");
Expand All @@ -69,7 +69,7 @@ void CCrossValidation::init()
m_parameters->add((CSGObject**) &m_evaluation_criterium,
"evaluation_criterium", "Used evaluation criterium");
m_parameters->add(&m_num_runs, "num_runs", "Number of repetitions");
m_parameters->add(&m_conf_int_p, "conf_int_p", "p-value of confidence "
m_parameters->add(&m_conf_int_alpha, "conf_int_alpha", "alpha-value of confidence "
"interval");
}

Expand All @@ -80,42 +80,43 @@ Parameter* CCrossValidation::get_machine_parameters() const

CrossValidationResult CCrossValidation::evaluate()
{
float64_t* results=new float64_t[m_num_runs];
SGVector<float64_t> results(m_num_runs);

for (index_t i=0; i<m_num_runs; ++i)
results[i]=evaluate_one_run();
results.vector[i]=evaluate_one_run();

/* construct evaluation result */
CrossValidationResult result;
result.value=CStatistics::mean(SGVector<float64_t>(results, m_num_runs));
result.has_conf_int=m_conf_int_p!=0;
result.conf_int_p=m_conf_int_p;
result.has_conf_int=m_conf_int_alpha!=0;
result.conf_int_alpha=m_conf_int_alpha;

if (result.has_conf_int)
{
/* TODO: calculate confidence interval, maybe put this into CMath? */
SG_NOTIMPLEMENTED;
result.conf_int_alpha=m_conf_int_alpha;
result.mean=CStatistics::confidence_intervals_mean(results,
result.conf_int_alpha, result.conf_int_low, result.conf_int_up);
}
else
{
result.mean=CStatistics::mean(results);
result.conf_int_low=0;
result.conf_int_up=0;
}

delete[] results;
delete[] results.vector;

return result;
}

void CCrossValidation::set_conf_int_p(float64_t conf_int_p)
void CCrossValidation::set_conf_int_alpha(float64_t conf_int_alpha)
{
if (conf_int_p<0||conf_int_p>=1)
if (conf_int_alpha<0||conf_int_alpha>=1)
{
SG_ERROR("%f is an illegal p-value for confidence interval of "
"cross-validation\n", conf_int_p);
SG_ERROR("%f is an illegal alpha-value for confidence interval of "
"cross-validation\n", conf_int_alpha);
}

m_conf_int_p=conf_int_p;
m_conf_int_alpha=conf_int_alpha;
}

void CCrossValidation::set_num_runs(int32_t num_runs)
Expand Down
23 changes: 14 additions & 9 deletions src/shogun/evaluation/CrossValidation.h
Expand Up @@ -23,23 +23,28 @@ class CLabels;
class CSplittingStrategy;
class CEvaluation;

/** @brief type to encapsulate the results of an evaluation run.
* May contain confidence interval (if conf_int_alpha!=0).
* m_conf_int_alpha is the probability for an error, i.e. the value does not lie
* in the confidence interval.
*/
typedef struct
{
float64_t value;
float64_t mean;
bool has_conf_int;
float64_t conf_int_low;
float64_t conf_int_up;
float64_t conf_int_p;
float64_t conf_int_alpha;

void print_result()
{
if (has_conf_int)
{
SG_SPRINT("[%f,%f] with p=%f, value=%f\n", conf_int_low, conf_int_up,
conf_int_p);
SG_SPRINT("[%f,%f] with alpha=%f, mean=%f\n", conf_int_low, conf_int_up,
conf_int_alpha, mean);
}
else
SG_SPRINT("%f\n", value);
SG_SPRINT("%f\n", mean);
}
} CrossValidationResult;

Expand Down Expand Up @@ -91,8 +96,8 @@ class CCrossValidation: public CSGObject

/** method for evaluation. Performs cross-validation.
* Is repeated m_num_runs. If this number is larger than one, a confidence
* interval is calculated if m_conf_int_p is (0<p<1).
* By default m_num_runs=1 and m_conf_int_p=0
* interval is calculated if m_conf_int_alpha is (0<p<1).
* By default m_num_runs=1 and m_conf_int_alpha=0
*
* @return result of evaluation
*/
Expand All @@ -105,7 +110,7 @@ class CCrossValidation: public CSGObject
void set_num_runs(int32_t num_runs);

/** setter for the number of runs to use for evaluation */
void set_conf_int_p(float64_t conf_int_p);
void set_conf_int_alpha(float64_t m_conf_int_alpha);

/** @return name of the SGSerializable */
inline virtual const char* get_name() const
Expand All @@ -129,7 +134,7 @@ class CCrossValidation: public CSGObject

private:
int32_t m_num_runs;
float64_t m_conf_int_p;
float64_t m_conf_int_alpha;

CMachine* m_machine;
CFeatures* m_features;
Expand Down
8 changes: 4 additions & 4 deletions src/shogun/modelselection/GridSearchModelSelection.cpp
Expand Up @@ -44,9 +44,9 @@ CParameterCombination* CGridSearchModelSelection::select_model()

CParameterCombination* best_combination=NULL;
if (m_cross_validation->get_evaluation_direction()==ED_MAXIMIZE)
best_result.value=CMath::ALMOST_NEG_INFTY;
best_result.mean=CMath::ALMOST_NEG_INFTY;
else
best_result.value=CMath::ALMOST_INFTY;
best_result.mean=CMath::ALMOST_INFTY;

/* apply all combinations and search for best one */
for (index_t i=0; i<combinations->get_num_elements(); ++i)
Expand All @@ -59,7 +59,7 @@ CParameterCombination* CGridSearchModelSelection::select_model()
/* check if current result is better, delete old combinations */
if (m_cross_validation->get_evaluation_direction()==ED_MAXIMIZE)
{
if (result.value>best_result.value)
if (result.mean>best_result.mean)
{
if (best_combination)
SG_UNREF(best_combination);
Expand All @@ -75,7 +75,7 @@ CParameterCombination* CGridSearchModelSelection::select_model()
}
else
{
if (result.value<best_result.value)
if (result.mean<best_result.mean)
{
if (best_combination)
SG_UNREF(best_combination);
Expand Down

0 comments on commit 9d9ea5c

Please sign in to comment.