diff --git a/examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py b/examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py new file mode 100644 index 00000000000..980363872d8 --- /dev/null +++ b/examples/undocumented/python_modular/multiclass_id3classifiertree_modular.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +from numpy import array + +# create data +train_data = array([[1.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 1.0, 2.0], +[2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0], +[3.0, 2.0, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0], +[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 1.0]]) + +train_labels = array([1.0, 2.0, 1.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0, 1.0, 2.0, 1.0, 3.0, 1.0, 2.0]) + +test_data = array([[2.0, 2.0, 1.0, 3.0, 3.0], +[2.0, 1.0, 2.0, 1.0, 2.0], +[3.0, 2.0, 1.0, 3.0, 2.0], +[1.0, 2.0, 1.0, 2.0, 1.0]]) + +parameter_list = [[train_data, train_labels, test_data]] + +def multiclass_id3classifiertree_modular(train=train_data,labels=train_labels,test=test_data): + try: + from modshogun import RealFeatures, MulticlassLabels, ID3ClassifierTree + except ImportError: + return + + # wrap features and labels into Shogun objects + feats_train=RealFeatures(train) + feats_test=RealFeatures(test) + feats_labels=MulticlassLabels(labels) + + # ID3 Tree formation + id3=ID3ClassifierTree() + id3.set_labels(feats_labels) + id3.train(feats_train) + + # Classify test data + output=id3.apply_multiclass(feats_test).get_labels() + + return id3,output + +if __name__=='__main__': + print('ID3ClassifierTree') + multiclass_id3classifiertree_modular(*parameter_list[0]) diff --git a/src/interfaces/modular/Multiclass.i b/src/interfaces/modular/Multiclass.i index f90f8f57dff..3d3d9bc521e 100644 --- a/src/interfaces/modular/Multiclass.i +++ b/src/interfaces/modular/Multiclass.i @@ -16,6 +16,7 @@ %rename(RelaxedTreeNodeData) CRelaxedTreeNodeData; %rename(TreeMachineNode) CTreeMachineNode; %rename(VwConditionalProbabilityTree) VwConditionalProbabilityTree; +%rename(ID3ClassifierTree) CID3ClassifierTree; %rename(RejectionStrategy) CRejectionStrategy; %rename(ThresholdRejectionStrategy) CThresholdRejectionStrategy; @@ -70,10 +71,12 @@ %include %include %include +%include namespace shogun { %template(TreeMachineWithConditionalProbabilityTreeNodeData) CTreeMachine; %template(TreeMachineWithRelaxedTreeNodeData) CTreeMachine; + %template(TreeMachineWithID3TreeNodeData) CTreeMachine; } %include @@ -82,6 +85,7 @@ namespace shogun %include %include %include +%include %include %include diff --git a/src/interfaces/modular/Multiclass_includes.i b/src/interfaces/modular/Multiclass_includes.i index 55e38f0b0dd..251a0ab4ea5 100644 --- a/src/interfaces/modular/Multiclass_includes.i +++ b/src/interfaces/modular/Multiclass_includes.i @@ -9,6 +9,8 @@ #include #include #include + #include + #include #include #include diff --git a/src/shogun/multiclass/tree/ID3ClassifierTree.h b/src/shogun/multiclass/tree/ID3ClassifierTree.h index e759d2f64bc..e0ed3574cf7 100644 --- a/src/shogun/multiclass/tree/ID3ClassifierTree.h +++ b/src/shogun/multiclass/tree/ID3ClassifierTree.h @@ -43,26 +43,29 @@ namespace shogun /** @brief class ID3ClassifierTree, implements classifier tree for discrete feature * values using the ID3 algorithm. The training algorithm implemented is as follows : * - * function ID3 (R: a set of non-categorical attributes, - * C: the categorical attribute, - * S: a training set) returns a decision tree; - * begin - * If S consists of records all with the same value for - * the categorical attribute, + * function ID3 (R: a set of non-categorical attributes, C: the categorical attribute, S: a training set)\n + * returns a decision tree; + * + * begin \n + * If S consists of records all with the same value for the categorical attribute,\n * return a single node with that value; - * If R is empty, then return a single node with as value - * the most frequent of the values of the categorical attribute - * that are found in records of S; [note that then there - * will be errors, that is, records that will be improperly - * classified]; + * + * If R is empty,\n + * return a single node with as value the most frequent + * of the values of the categorical attribute that are found in records of S;\n + * [note that then there will be errors, that is, records that will be improperly classified]; + * * Let D be the attribute with largest Gain(D,S) - * among attributes in R; - * Let \f${d_j| j=1,2, .., m}\f$ be the values of attribute D; + * among attributes in R; + * + * Let \f${d_j| j=1,2, .., m}\f$ be the values of attribute D;\n * Let \f${S_j| j=1,2, .., m}\f$ be the subsets of S consisting - * respectively of records with value dj for attribute D; + * respectively of records with value dj for attribute D; + * * Return a tree with root labeled D and arcs labeled - * \f$d_1, d_2, .., d_m\f$ going respectively to the trees + * \f$d_1, d_2, .., d_m\f$ going respectively to the trees \n * ID3(R-{D}, C, \f$S_1\f$), .., ID3(R-{D}, C, \f$S_m\f$); + * * end ID3; * */