Skip to content

Commit

Permalink
ID3 modular interface setup+ID3 API example added
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdarparijat committed Mar 19, 2014
1 parent c34fd60 commit ef57566
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 0 deletions.
@@ -0,0 +1,42 @@
#!/usr/bin/env python
from numpy import array

# create data
train_data = array([[1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0],
[2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0],
[3.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0],
[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]])

train_labels = array([1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0])

test_data = array([[2.0, 2.0, 1.0, 3.0, 3.0],
[2.0, 1.0, 2.0, 1.0, 2.0],
[3.0, 2.0, 1.0, 3.0, 2.0],
[1.0, 2.0, 1.0, 2.0, 1.0]])

parameter_list = [[train_data, train_labels, test_data]]

def multiclass_id3classifiertree_modular(train=train_data,labels=train_labels,test=test_data):
try:
from modshogun import RealFeatures, MulticlassLabels, ID3ClassifierTree
except ImportError:
return

# wrap features and labels into Shogun objects
feats_train=RealFeatures(train)
feats_test=RealFeatures(test)
feats_labels=MulticlassLabels(labels)

# ID3 Tree formation
id3=ID3ClassifierTree()
id3.set_labels(feats_labels)
id3.train(feats_train)

# Classify test data
output=id3.apply_multiclass(feats_test).get_labels()

return id3,output

if __name__=='__main__':
print('ID3ClassifierTree')
multiclass_id3classifiertree_modular(*parameter_list[0])
4 changes: 4 additions & 0 deletions src/interfaces/modular/Multiclass.i
Expand Up @@ -16,6 +16,7 @@
%rename(RelaxedTreeNodeData) CRelaxedTreeNodeData;
%rename(TreeMachineNode) CTreeMachineNode;
%rename(VwConditionalProbabilityTree) VwConditionalProbabilityTree;
%rename(ID3ClassifierTree) CID3ClassifierTree;

%rename(RejectionStrategy) CRejectionStrategy;
%rename(ThresholdRejectionStrategy) CThresholdRejectionStrategy;
Expand Down Expand Up @@ -70,10 +71,12 @@
%include <shogun/multiclass/tree/TreeMachine.h>
%include <shogun/multiclass/tree/RelaxedTreeNodeData.h>
%include <shogun/multiclass/tree/ConditionalProbabilityTreeNodeData.h>
%include <shogun/multiclass/tree/ID3TreeNodeData.h>
namespace shogun
{
%template(TreeMachineWithConditionalProbabilityTreeNodeData) CTreeMachine<ConditionalProbabilityTreeNodeData>;
%template(TreeMachineWithRelaxedTreeNodeData) CTreeMachine<RelaxedTreeNodeData>;
%template(TreeMachineWithID3TreeNodeData) CTreeMachine<id3TreeNodeData>;
}

%include <shogun/multiclass/tree/ConditionalProbabilityTree.h>
Expand All @@ -82,6 +85,7 @@ namespace shogun
%include <shogun/multiclass/tree/RelaxedTree.h>
%include <shogun/multiclass/tree/TreeMachineNode.h>
%include <shogun/multiclass/tree/VwConditionalProbabilityTree.h>
%include <shogun/multiclass/tree/ID3ClassifierTree.h>

%include <shogun/multiclass/RejectionStrategy.h>
%include <shogun/multiclass/MulticlassStrategy.h>
Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/modular/Multiclass_includes.i
Expand Up @@ -9,6 +9,8 @@
#include <shogun/multiclass/tree/RelaxedTreeUtil.h>
#include <shogun/multiclass/tree/TreeMachineNode.h>
#include <shogun/multiclass/tree/VwConditionalProbabilityTree.h>
#include <shogun/multiclass/tree/ID3TreeNodeData.h>
#include <shogun/multiclass/tree/ID3ClassifierTree.h>

#include <shogun/multiclass/RejectionStrategy.h>
#include <shogun/multiclass/MulticlassStrategy.h>
Expand Down
20 changes: 20 additions & 0 deletions src/shogun/multiclass/tree/ID3ClassifierTree.h
Expand Up @@ -44,25 +44,45 @@ namespace shogun
* values using the ID3 algorithm. The training algorithm implemented is as follows :
*
* function ID3 (R: a set of non-categorical attributes,
*
* C: the categorical attribute,
*
* S: a training set) returns a decision tree;
*
* begin
*
* If S consists of records all with the same value for
*
* the categorical attribute,
*
* return a single node with that value;
*
* If R is empty, then return a single node with as value
*
* the most frequent of the values of the categorical attribute
*
* that are found in records of S; [note that then there
*
* will be errors, that is, records that will be improperly
*
* classified];
*
* Let D be the attribute with largest Gain(D,S)
*
* among attributes in R;
*
* Let \f${d_j| j=1,2, .., m}\f$ be the values of attribute D;
*
* Let \f${S_j| j=1,2, .., m}\f$ be the subsets of S consisting
*
* respectively of records with value dj for attribute D;
*
* Return a tree with root labeled D and arcs labeled
*
* \f$d_1, d_2, .., d_m\f$ going respectively to the trees
*
* ID3(R-{D}, C, \f$S_1\f$), .., ID3(R-{D}, C, \f$S_m\f$);
*
* end ID3;
*
*/
Expand Down

0 comments on commit ef57566

Please sign in to comment.