-
-
Notifications
You must be signed in to change notification settings - Fork 1k
/
evaluation_cross_validation_regression.cpp
124 lines (97 loc) · 3.35 KB
/
evaluation_cross_validation_regression.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Heiko Strathmann, Soeren Sonnenburg, Jacob Walker, Giovanni De Toni,
* Evgeniy Andreev, Soumyajit De, Viktor Gal, Sergey Lisitsyn
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/regression/KernelRidgeRegression.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/evaluation/MeanSquaredError.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test_cross_validation()
{
#ifdef HAVE_LAPACK
/* data matrix dimensions */
index_t num_vectors=100;
index_t num_features=1;
/* training label data */
SGVector<float64_t> lab(num_vectors);
auto m_rng = std::unique_ptr<CRandom>(new CRandom());
/* fill data matrix and labels */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
SGVector<float64_t>::range_fill_vector(train_dat.matrix, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
/* labels are linear plus noise */
lab.vector[i] = i + m_rng->normal_random(0, 1.0);
}
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels */
CRegressionLabels* labels=new CRegressionLabels(lab);
/* kernel */
CLinearKernel* kernel=new CLinearKernel();
kernel->init(features, features);
/* kernel ridge regression*/
float64_t tau=0.0001;
CKernelRidgeRegression* krr=new CKernelRidgeRegression(tau, kernel, labels);
/* evaluation criterion */
CMeanSquaredError* eval_crit=
new CMeanSquaredError();
/* train and output */
krr->train(features);
CRegressionLabels* output= CLabelsFactory::to_regression(krr->apply());
for (index_t i=0; i<num_vectors; ++i)
{
SG_SPRINT("x=%f, train=%f, predict=%f\n", train_dat.matrix[i],
labels->get_label(i), output->get_label(i));
}
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training error: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=5;
CCrossValidationSplitting* splitting=
new CCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(krr, features, labels,
splitting, eval_crit);
cross->set_num_runs(100);
// cross->set_conf_int_alpha(0.05);
/* actual evaluation */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("cross_validation estimate:\n");
result->print_result();
/* same crude assertion as for above evaluation */
ASSERT(result->get_mean() < 2);
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(features);
#endif /* HAVE_LAPACK */
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_cross_validation();
exit_shogun();
return 0;
}