New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Relaxed Tree #679
Relaxed Tree #679
Changes from all commits
784e98a
77e9276
f46eb29
bc41584
c726de7
eea0de0
e06995c
3eb3a6e
f5db5e2
9592235
1037b98
877117b
407df54
dbf4671
dab1256
911f456
da3eaa8
62b53f4
eb8ae6b
1d28ec7
aa6410d
a9f1d2b
055bac5
ab2fad0
d1ce23f
714da66
8a30ff6
322685e
84dcea2
c69baaf
11389ec
1d74145
66ff8a1
c22eed2
9dfcfb0
706e86e
e998b74
daf2a13
5993b24
8041148
3c40d1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ configure.log | |
*.swo | ||
*.mex* | ||
*.oct | ||
.localvimrc | ||
|
||
# /doc/ | ||
/doc/html | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#include <algorithm> | ||
|
||
#include <shogun/labels/MulticlassLabels.h> | ||
#include <shogun/io/streaming/StreamingAsciiFile.h> | ||
#include <shogun/io/SGIO.h> | ||
#include <shogun/features/streaming/StreamingDenseFeatures.h> | ||
#include <shogun/features/DenseFeatures.h> | ||
#include <shogun/features/DenseSubsetFeatures.h> | ||
#include <shogun/base/init.h> | ||
#include <shogun/multiclass/tree/RelaxedTree.h> | ||
#include <shogun/multiclass/MulticlassLibLinear.h> | ||
#include <shogun/evaluation/MulticlassAccuracy.h> | ||
#include <shogun/kernel/GaussianKernel.h> | ||
|
||
#define EPSILON 1e-5 | ||
|
||
using namespace shogun; | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
int32_t num_vectors = 0; | ||
int32_t num_feats = 0; | ||
|
||
init_shogun_with_defaults(); | ||
|
||
const char*fname_train = "../data/7class_example4_train.dense"; | ||
CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train); | ||
SG_REF(train_file); | ||
|
||
CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); | ||
SG_REF(stream_features); | ||
|
||
SGMatrix<float64_t> mat; | ||
SGVector<float64_t> labvec(1000); | ||
|
||
stream_features->start_parser(); | ||
SGVector< float64_t > vec; | ||
int32_t num_vec=0; | ||
while (stream_features->get_next_example()) | ||
{ | ||
vec = stream_features->get_vector(); | ||
if (num_feats == 0) | ||
{ | ||
num_feats = vec.vlen; | ||
mat = SGMatrix<float64_t>(num_feats, 1000); | ||
} | ||
std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors)); | ||
labvec[num_vectors] = stream_features->get_label(); | ||
num_vectors++; | ||
stream_features->release_example(); | ||
num_vec++; | ||
|
||
if (num_vec > 20000) | ||
break; | ||
} | ||
stream_features->end_parser(); | ||
mat.num_cols = num_vectors; | ||
labvec.vlen = num_vectors; | ||
|
||
CMulticlassLabels* labels = new CMulticlassLabels(labvec); | ||
SG_REF(labels); | ||
|
||
// Create features with the useful values from mat | ||
CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat); | ||
SG_REF(features); | ||
|
||
// Create RelaxedTree Machine | ||
CRelaxedTree *machine = new CRelaxedTree(); | ||
SG_REF(machine); | ||
machine->set_labels(labels); | ||
CKernel *kernel = new CGaussianKernel(); | ||
SG_REF(kernel); | ||
machine->set_kernel(kernel); | ||
|
||
CMulticlassLibLinear *svm = new CMulticlassLibLinear(); | ||
|
||
machine->set_machine_for_confusion_matrix(svm); | ||
machine->train(features); | ||
|
||
|
||
CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply()); | ||
|
||
CMulticlassAccuracy *evaluator = new CMulticlassAccuracy(); | ||
SG_SPRINT("Accuracy = %.4f\n", evaluator->evaluate(output, labels)); | ||
|
||
// Free resources | ||
SG_UNREF(machine); | ||
SG_UNREF(output); | ||
SG_UNREF(features); | ||
SG_UNREF(labels); | ||
SG_UNREF(train_file); | ||
SG_UNREF(stream_features); | ||
SG_UNREF(evaluator); | ||
SG_UNREF(kernel); | ||
|
||
exit_shogun(); | ||
|
||
return 0; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import classifier_multiclass_shared | ||
|
||
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data(True) | ||
|
||
parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]] | ||
|
||
def classifier_multiclass_relaxedtree (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): | ||
from shogun.Features import RealFeatures, MulticlassLabels | ||
from shogun.Classifier import RelaxedTree, MulticlassLibLinear | ||
from shogun.Kernel import GaussianKernel | ||
|
||
print('Working on a problem of %d features and %d samples' % fm_train_real.shape) | ||
|
||
feats_train = RealFeatures(fm_train_real) | ||
|
||
labels = MulticlassLabels(label_train_multiclass) | ||
|
||
machine = RelaxedTree() | ||
machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) | ||
machine.set_kernel(GaussianKernel()) | ||
machine.set_labels(labels) | ||
machine.train(feats_train) | ||
|
||
label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) | ||
out = label_pred.get_labels() | ||
|
||
if label_test_multiclass is not None: | ||
from shogun.Evaluation import MulticlassAccuracy | ||
labels_test = MulticlassLabels(label_test_multiclass) | ||
evaluator = MulticlassAccuracy() | ||
acc = evaluator.evaluate(label_pred, labels_test) | ||
print('Accuracy = %.4f' % acc) | ||
|
||
return out | ||
|
||
if __name__=='__main__': | ||
print('MulticlassMachine') | ||
classifier_multiclass_relaxedtree(*parameter_list[0]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
#ifndef __SGVECTOR_H__ | ||
#define __SGVECTOR_H__ | ||
|
||
#include <algorithm> | ||
|
||
#include <shogun/io/SGIO.h> | ||
#include <shogun/lib/DataType.h> | ||
#include <shogun/lib/SGReferencedData.h> | ||
|
@@ -471,6 +473,57 @@ template<class T> class SGVector : public SGReferencedData | |
const SGVector<T>, const char* name="vector", | ||
const char* prefix=""); | ||
|
||
/** find index for occurance of an element | ||
* @param elem the element to find | ||
*/ | ||
SGVector<index_t> find(T elem); | ||
|
||
/** find index for elements where the predicate returns true | ||
* @param p the predicate, it should accept the value of the element and return a bool | ||
*/ | ||
template <typename Predicate> | ||
SGVector<index_t> find_if(Predicate p) | ||
{ | ||
SGVector<index_t> idx(vlen); | ||
index_t k=0; | ||
|
||
for (index_t i=0; i < vlen; ++i) | ||
if (p(vector[i])) | ||
idx[k++] = i; | ||
|
||
idx.vlen = k; | ||
return idx; | ||
} | ||
|
||
/** Helper functor for the function sorted_index */ | ||
struct IndexSorter | ||
{ | ||
IndexSorter(const SGVector<T> *vec) { data = vec->vector; } | ||
|
||
bool operator() (index_t i, index_t j) const | ||
{ | ||
return data[i] < data[j]; | ||
} | ||
|
||
const T* data; | ||
}; | ||
/** get sorted index. | ||
* | ||
* idx = v.sorted_index() is similar to Matlab [~, idx] = sort(v) | ||
* | ||
* @return sorted index for this vector | ||
*/ | ||
SGVector<index_t> sorted_index() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please use the CMath::qsort_index function here (at least inside this function) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, I tried to modify this. But I failed because this will introduce circular dependency for CMath and SGVector. Any suggestion for how to fix this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't
and then code depending on it work w/o any math include work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid this won't work. Forward class declaration only tells the compiler there is a class named this, so that you can use some limited things like the pointer type (CMath *). But you cannot do things that depends on the content of the class definition. For example, calling CMath's method isn't allowed here because the compiler doesn't know what method CMath has now (by merely seeing a statement "class CMath"). |
||
{ | ||
IndexSorter cmp(this); | ||
SGVector<index_t> idx(vlen); | ||
for (index_t i=0; i < vlen; ++i) | ||
idx[i] = i; | ||
|
||
std::sort(idx.vector, idx.vector+vlen, cmp); | ||
|
||
return idx; | ||
} | ||
|
||
protected: | ||
/** needs to be overridden to copy data */ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do you use streaming features if you in the end only work with a dense feature matrix?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because the label for the dataset is embedded in the feature file. I'm using the streaming features to extract the labels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we need to provide another load routine in dense features - but later.