/
pegasos_optimize.cpp
117 lines (97 loc) · 3.53 KB
/
pegasos_optimize.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// Distributed under GNU General Public License (see license.txt for details).
//
// Copyright (c) 2007 Shai Shalev-Shwartz.
// All Rights Reserved.
#include <shogun/optimization/pegasos/pegasos_optimize.h>
using namespace shogun;
// ------------------------------------------------------------//
// ---------------- OPTIMIZING --------------------------------//
// ------------------------------------------------------------//
SGVector<float64_t> CPegasos::Learn(// Input variables
CDotFeatures* features,
SGVector<float64_t> labels,
int dimension,
double lambda,int max_iter,int exam_per_iter,int num_iter_to_avg,
// Output variables
double& obj_value, double& norm_value,double& loss_value,
// additional parameters
int eta_rule_type , double eta_constant ,
int projection_rule, double projection_constant)
{
int num_examples = features->get_num_vectors();
// Initialization of classification vector
SGVector<float64_t> W(dimension);
SGVector<float64_t> AvgW(dimension);
double avgScale = (num_iter_to_avg > max_iter) ? max_iter : num_iter_to_avg;
// ---------------- Main Loop -------------------
for (int i = 0; i < max_iter; ++i) {
// learning rate
double eta;
if (eta_rule_type == 0) { // Pegasos eta rule
eta = 1 / (lambda * (i+2));
} else if (eta_rule_type == 1) { // Norma rule
eta = eta_constant / sqrt(i+2);
} else {
eta = eta_constant;
}
// gradient indices and losses
std::vector<uint> grad_index;
std::vector<double> grad_weights;
// calc sub-gradients
for (int j=0; j < exam_per_iter; ++j) {
// choose random example
uint r = ((int)rand()) % num_examples;
// calculate prediction
double prediction = features->dense_dot_sgvec(r, W);
// calculate loss
double cur_loss = 1 - labels[r]*prediction;
if (cur_loss < 0.0) cur_loss = 0.0;
// and add to the gradient
if (cur_loss > 0.0) {
grad_index.push_back(r);
grad_weights.push_back(eta*labels[r]/exam_per_iter);
}
}
// scale w
// W.scale(1.0 - eta*lambda);
double scaling = 1.0 - eta*lambda;
if (scaling==0)
W.zero();
else
{
SGVector<float64_t>::scale_vector(scaling, W.vector, W.vlen);
}
// and add sub-gradients
for (uint j=0; j<grad_index.size(); ++j) {
//W.add(Dataset[grad_index[j]],grad_weights[j]);
features->add_to_dense_vec(grad_weights[j],grad_index[j],W.vector,W.vlen);
}
// Project if needed
if (projection_rule == 0) { // Pegasos projection rule
double norm2 = SGVector<float64_t>::twonorm(W.vector, W.vlen);
if (norm2 > 1.0/lambda) {
SGVector<float64_t>::scale_vector(sqrt(1.0/(lambda*norm2)), W.vector, W.vlen);
}
} else if (projection_rule == 1) { // other projection
double norm2 = SGVector<float64_t>::twonorm(W.vector, W.vlen);
if (norm2 > (projection_constant*projection_constant)) {
SGVector<float64_t>::scale_vector(projection_constant/sqrt(norm2), W.vector, W.vlen);
}
} // else -- no projection
// and update AvgW
if (max_iter <= num_iter_to_avg + i)
for (int j=0; j<dimension; j++)
AvgW[j] += W[j]/avgScale;
}
// Calculate objective value
norm_value = SGVector<float64_t>::twonorm(AvgW.vector, AvgW.vlen);
obj_value = norm_value * lambda / 2.0;
loss_value = 0.0;
for (int i=0; i < num_examples; ++i) {
double cur_loss = 1 - labels[i]*features->dense_dot_sgvec(i,AvgW);
if (cur_loss < 0.0) cur_loss = 0.0;
loss_value += cur_loss/num_examples;
obj_value += cur_loss/num_examples;
}
return AvgW;
}