-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Relaxed Tree #679
Relaxed Tree #679
Changes from 37 commits
784e98a
77e9276
f46eb29
bc41584
c726de7
eea0de0
e06995c
3eb3a6e
f5db5e2
9592235
1037b98
877117b
407df54
dbf4671
dab1256
911f456
da3eaa8
62b53f4
eb8ae6b
1d28ec7
aa6410d
a9f1d2b
055bac5
ab2fad0
d1ce23f
714da66
8a30ff6
322685e
84dcea2
c69baaf
11389ec
1d74145
66ff8a1
c22eed2
9dfcfb0
706e86e
e998b74
daf2a13
5993b24
8041148
3c40d1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ configure.log | |
*.swo | ||
*.mex* | ||
*.oct | ||
.localvimrc | ||
|
||
# /doc/ | ||
/doc/html | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#include <algorithm> | ||
|
||
#include <shogun/labels/MulticlassLabels.h> | ||
#include <shogun/io/streaming/StreamingAsciiFile.h> | ||
#include <shogun/io/SGIO.h> | ||
#include <shogun/features/streaming/StreamingDenseFeatures.h> | ||
#include <shogun/features/DenseFeatures.h> | ||
#include <shogun/features/DenseSubsetFeatures.h> | ||
#include <shogun/base/init.h> | ||
#include <shogun/multiclass/tree/RelaxedTree.h> | ||
#include <shogun/multiclass/MulticlassLibLinear.h> | ||
#include <shogun/evaluation/MulticlassAccuracy.h> | ||
#include <shogun/kernel/GaussianKernel.h> | ||
|
||
#define EPSILON 1e-5 | ||
|
||
using namespace shogun; | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
int32_t num_vectors = 0; | ||
int32_t num_feats = 0; | ||
|
||
init_shogun_with_defaults(); | ||
|
||
const char*fname_train = "../data/7class_example4_train.dense"; | ||
CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train); | ||
SG_REF(train_file); | ||
|
||
CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); | ||
SG_REF(stream_features); | ||
|
||
SGMatrix<float64_t> mat; | ||
SGVector<float64_t> labvec(1000); | ||
|
||
stream_features->start_parser(); | ||
SGVector< float64_t > vec; | ||
int32_t num_vec=0; | ||
while (stream_features->get_next_example()) | ||
{ | ||
vec = stream_features->get_vector(); | ||
if (num_feats == 0) | ||
{ | ||
num_feats = vec.vlen; | ||
mat = SGMatrix<float64_t>(num_feats, 1000); | ||
} | ||
std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors)); | ||
labvec[num_vectors] = stream_features->get_label(); | ||
num_vectors++; | ||
stream_features->release_example(); | ||
num_vec++; | ||
|
||
if (num_vec > 20000) | ||
break; | ||
} | ||
stream_features->end_parser(); | ||
mat.num_cols = num_vectors; | ||
labvec.vlen = num_vectors; | ||
|
||
CMulticlassLabels* labels = new CMulticlassLabels(labvec); | ||
SG_REF(labels); | ||
|
||
// Create features with the useful values from mat | ||
CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat); | ||
SG_REF(features); | ||
|
||
// Create RelaxedTree Machine | ||
CRelaxedTree *machine = new CRelaxedTree(); | ||
SG_REF(machine); | ||
machine->set_labels(labels); | ||
CKernel *kernel = new CGaussianKernel(); | ||
SG_REF(kernel); | ||
machine->set_kernel(kernel); | ||
|
||
CMulticlassLibLinear *svm = new CMulticlassLibLinear(); | ||
|
||
machine->set_machine_for_confusion_matrix(svm); | ||
machine->train(features); | ||
|
||
|
||
CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply()); | ||
|
||
CMulticlassAccuracy *evaluator = new CMulticlassAccuracy(); | ||
SG_SPRINT("Accuracy = %.4f\n", evaluator->evaluate(output, labels)); | ||
|
||
// Free resources | ||
SG_UNREF(machine); | ||
SG_UNREF(output); | ||
SG_UNREF(features); | ||
SG_UNREF(labels); | ||
SG_UNREF(train_file); | ||
SG_UNREF(stream_features); | ||
SG_UNREF(evaluator); | ||
SG_UNREF(kernel); | ||
|
||
exit_shogun(); | ||
|
||
return 0; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import classifier_multiclass_shared | ||
|
||
[traindat, label_traindat, testdat, label_testdat] = classifier_multiclass_shared.prepare_data(True) | ||
|
||
parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]] | ||
|
||
def classifier_multiclass_relaxedtree (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): | ||
from shogun.Features import RealFeatures, MulticlassLabels | ||
from shogun.Classifier import RelaxedTree, MulticlassLibLinear | ||
from shogun.Kernel import GaussianKernel | ||
|
||
print('Working on a problem of %d features and %d samples' % fm_train_real.shape) | ||
|
||
feats_train = RealFeatures(fm_train_real) | ||
|
||
labels = MulticlassLabels(label_train_multiclass) | ||
|
||
machine = RelaxedTree() | ||
machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) | ||
machine.set_kernel(GaussianKernel()) | ||
machine.set_labels(labels) | ||
machine.train(feats_train) | ||
|
||
label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) | ||
out = label_pred.get_labels() | ||
|
||
if label_test_multiclass is not None: | ||
from shogun.Evaluation import MulticlassAccuracy | ||
labels_test = MulticlassLabels(label_test_multiclass) | ||
evaluator = MulticlassAccuracy() | ||
acc = evaluator.evaluate(label_pred, labels_test) | ||
print('Accuracy = %.4f' % acc) | ||
|
||
return out | ||
|
||
if __name__=='__main__': | ||
print('MulticlassMachine') | ||
classifier_multiclass_relaxedtree(*parameter_list[0]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,24 @@ class CSGObject | |
int32_t unref(); | ||
#endif //USE_REFERENCE_COUNTING | ||
|
||
/** A shalow copy. | ||
* All the SGObject instance variables will be simply assigned and SG_REF-ed. | ||
*/ | ||
virtual CSGObject *shalow_copy() const | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: should be shallow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, I'm sorry for the typo. This is fixed now. |
||
{ | ||
SG_NOTIMPLEMENTED; | ||
return NULL; | ||
} | ||
|
||
/** A deep copy. | ||
* All the instance variables will also be copied. | ||
*/ | ||
virtual CSGObject *deep_copy() const | ||
{ | ||
SG_NOTIMPLEMENTED; | ||
return NULL; | ||
} | ||
|
||
/** Returns the name of the SGSerializable instance. It MUST BE | ||
* the CLASS NAME without the prefixed `C'. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you need the kernel factories for? they really seem to be not necessary... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, the problem is that I will have to create Kernels in the run-time. Since I do not know what the type of the kernel the user supplied, I have to use the kernelfactory. Or I do not know what to new. If you know any better solution here, please tell me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really get it then - in your kernel factory you are creating a kernel with default settings only - e.g. gaussian kernel with certain width. How can the user supply a certain width to be used? Wouldn't it be better to have some clone() / copy constructor for the kernel objects to create a clone of the kernel with its parameters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And why do you have to create a kernel? Couldn't the user just supply you a kernel object? At least this is what we do for any SVM... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If CKernel has a clone() method, I can do this. Because I have to create multiple Kernel instances. Yes, the built-in KernelFactory can only use default parameters. If the users want to customize parameters, he has to write a subclass of the KernelFactory himself. This is not an elegant solution, but I think this is currently the most straightforward solution. I have written an email to the list to explain my struggling for this. Hope I clarified the problem clearly. And I'd be very glad to see anyone come up with a better solution for the whole thing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are kernel factories still there now that you use shallow_copy() ? |
||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#ifndef GENERICKERNELFACTORY_H__ | ||
#define GENERICKERNELFACTORY_H__ | ||
|
||
#include <shogun/kernel/KernelFactory.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
template <typename KernelType> | ||
class CGenericKernelFactory | ||
:public CKernelFactory | ||
{ | ||
public: | ||
/** constructor */ | ||
CGenericKernelFactory() {} | ||
|
||
/** destructor */ | ||
virtual ~CGenericKernelFactory() {} | ||
|
||
/** get name */ | ||
virtual const char* get_name() const { return "GenericKernelFactory"; } | ||
|
||
/** construct a *new* kernel */ | ||
virtual CKernel *make_kernel() | ||
{ | ||
CKernel *kernel = new KernelType(); | ||
SG_REF(kernel); | ||
return kernel; | ||
} | ||
}; | ||
|
||
} /* shogun */ | ||
|
||
#endif /* end of include guard: GENERICKERNELFACTORY_H__ */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/* | ||
* This program is free software; you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation; either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* Written (W) 2012 Chiyuan Zhang | ||
* Copyright (C) 2012 Chiyuan Zhang | ||
*/ | ||
|
||
#ifndef KERNELFACTORY_H__ | ||
#define KERNELFACTORY_H__ | ||
|
||
#include <shogun/kernel/Kernel.h> | ||
|
||
namespace shogun | ||
{ | ||
|
||
class CKernelFactory: public CSGObject | ||
{ | ||
public: | ||
/** constructor */ | ||
CKernelFactory() {} | ||
|
||
/** destructor */ | ||
virtual ~CKernelFactory() {} | ||
|
||
/** get name */ | ||
virtual const char* get_name() const { return "KernelFactory"; } | ||
|
||
/** construct a *new* kernel */ | ||
virtual CKernel *make_kernel()=0; | ||
}; | ||
|
||
} /* shogun */ | ||
|
||
#endif /* end of include guard: KERNELFACTORY_H__ */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
#ifndef __SGVECTOR_H__ | ||
#define __SGVECTOR_H__ | ||
|
||
#include <algorithm> | ||
|
||
#include <shogun/io/SGIO.h> | ||
#include <shogun/lib/DataType.h> | ||
#include <shogun/lib/SGReferencedData.h> | ||
|
@@ -471,6 +473,57 @@ template<class T> class SGVector : public SGReferencedData | |
const SGVector<T>, const char* name="vector", | ||
const char* prefix=""); | ||
|
||
/** find index for occurance of an element | ||
* @param elem the element to find | ||
*/ | ||
SGVector<index_t> find(T elem); | ||
|
||
/** find index for elements where the predicate returns true | ||
* @param p the predicate, it should accept the value of the element and return a bool | ||
*/ | ||
template <typename Predicate> | ||
SGVector<index_t> find_if(Predicate p) | ||
{ | ||
SGVector<index_t> idx(vlen); | ||
index_t k=0; | ||
|
||
for (index_t i=0; i < vlen; ++i) | ||
if (p(vector[i])) | ||
idx[k++] = i; | ||
|
||
idx.vlen = k; | ||
return idx; | ||
} | ||
|
||
/** Helper functor for the function sorted_index */ | ||
struct IndexSorter | ||
{ | ||
IndexSorter(const SGVector<T> *vec) { data = vec->vector; } | ||
|
||
bool operator() (index_t i, index_t j) const | ||
{ | ||
return data[i] < data[j]; | ||
} | ||
|
||
const T* data; | ||
}; | ||
/** get sorted index. | ||
* | ||
* idx = v.sorted_index() is similar to Matlab [~, idx] = sort(v) | ||
* | ||
* @return sorted index for this vector | ||
*/ | ||
SGVector<index_t> sorted_index() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please use the CMath::qsort_index function here (at least inside this function) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, I tried to modify this. But I failed because this will introduce circular dependency for CMath and SGVector. Any suggestion for how to fix this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't
and then code depending on it work w/o any math include work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid this won't work. Forward class declaration only tells the compiler there is a class named this, so that you can use some limited things like the pointer type (CMath *). But you cannot do things that depends on the content of the class definition. For example, calling CMath's method isn't allowed here because the compiler doesn't know what method CMath has now (by merely seeing a statement "class CMath"). |
||
{ | ||
IndexSorter cmp(this); | ||
SGVector<index_t> idx(vlen); | ||
for (index_t i=0; i < vlen; ++i) | ||
idx[i] = i; | ||
|
||
std::sort(idx.vector, idx.vector+vlen, cmp); | ||
|
||
return idx; | ||
} | ||
|
||
protected: | ||
/** needs to be overridden to copy data */ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do you use streaming features if you in the end only work with a dense feature matrix?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because the label for the dataset is embedded in the feature file. I'm using the streaming features to extract the labels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we need to provide another load routine in dense features - but later.