From fbd979c770049c395c2f5e622ee802bf30970157 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Mon, 21 Nov 2016 23:20:41 -0500 Subject: [PATCH] cookbook_cartree --- data | 2 +- .../multiclass_classifier/cartree.rst | 47 +++++++++++++++++++ .../meta/src/multiclass_classifier/cartree.sg | 36 ++++++++++++++ .../multiclass_cartree_modular.py | 37 --------------- 4 files changed, 84 insertions(+), 38 deletions(-) create mode 100644 doc/cookbook/source/examples/multiclass_classifier/cartree.rst create mode 100644 examples/meta/src/multiclass_classifier/cartree.sg delete mode 100644 examples/undocumented/python_modular/multiclass_cartree_modular.py diff --git a/data b/data index 4a79db623b9..0ef31924307 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 4a79db623b97434b1e9038cb1d0c0ff2997d6855 +Subproject commit 0ef3192430753d689b6700ca93c31173d4b38055 diff --git a/doc/cookbook/source/examples/multiclass_classifier/cartree.rst b/doc/cookbook/source/examples/multiclass_classifier/cartree.rst new file mode 100644 index 00000000000..5fc90b8c8f1 --- /dev/null +++ b/doc/cookbook/source/examples/multiclass_classifier/cartree.rst @@ -0,0 +1,47 @@ +================================== +Classification And Regression Tree +================================== + +Decision tree learning uses a decision tree as a predictive model which maps observations about an item to conclusions about the item's target value. + +Decision trees are mostly used as the following two types: + +- Classification tree, where the predicted outcome is the class to which the data belongs. +- Regression tree, where predicted outcome can be considered a real number. + +Classification And Regression Tree (CART) algorithm is an umbrella method that can be applied to generate both classification tree and regression tree. + +In this example, we showed how to apply CART algorithm to multi-class dataset and predict the labels with classification tree. + +------- +Example +------- + +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CMulticlassLabels` as + +.. sgexample:: cartree.sg:create_features + +We set the type of each predictive attribute (true for nominal, false for ordinal/continuous) + +.. sgexample:: cartree.sg:set_attribute_types + +We create an instance of the :sgclass:`CCARTree` classifier by passting it the attribute types and the tree type. +We can also set the number of subsets used in cross-valiation and whether to use cross-validation pruning. + +.. sgexample:: cartree.sg:create_instance + +Then we train and apply it to test data, which here gives :sgclass:`CMulticlassLabels`. + +.. sgexample:: cartree.sg:train_and_apply + +We can evaluate test performance via e.g. :sgclass:`CMulticlassAccuracy`. + +.. sgexample:: cartree.sg:evaluate_accuracy + +---------- +References +---------- + +:wiki:`Decision_tree_learning` + +:wiki:`Predictive_analytics#Classification_and_regression_trees_.28CART.29` diff --git a/examples/meta/src/multiclass_classifier/cartree.sg b/examples/meta/src/multiclass_classifier/cartree.sg new file mode 100644 index 00000000000..48772e95ec5 --- /dev/null +++ b/examples/meta/src/multiclass_classifier/cartree.sg @@ -0,0 +1,36 @@ +CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat") +CSVFile f_feats_test("../../data/classifier_4class_2d_linear_features_test.dat") +CSVFile f_labels_train("../../data/classifier_4class_2d_linear_labels_train.dat") +CSVFile f_labels_test("../../data/classifier_4class_2d_linear_labels_test.dat") +Math:init_random(1) + +#![create_features] +RealFeatures features_train(f_feats_train) +RealFeatures features_test(f_feats_test) +MulticlassLabels labels_train(f_labels_train) +MulticlassLabels labels_test(f_labels_test) +#![create_features] + +#![set_attribute_types] +BoolVector ft(2) +ft[0] = False +ft[1] = False +#![set_attribute_types] + +#![create_instance] +CARTree classifier(ft,enum EProblemType.PT_MULTICLASS, 5, True) +classifier.set_labels(labels_train) +#![create_instance] + +#![train_and_apply] +classifier.train(features_train) +MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) +#![train_and_apply] + +#![evaluate_accuracy] +MulticlassAccuracy eval() +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# integration testing variables +RealVector output = labels_predict.get_labels() diff --git a/examples/undocumented/python_modular/multiclass_cartree_modular.py b/examples/undocumented/python_modular/multiclass_cartree_modular.py deleted file mode 100644 index d0d870356ae..00000000000 --- a/examples/undocumented/python_modular/multiclass_cartree_modular.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -from numpy import array - -traindat = '../data/fm_train_real.dat' -testdat = '../data/fm_test_real.dat' -label_traindat = '../data/label_train_multiclass.dat' - -# set both input attributes as not nominal (ie. continuous) -feattypes = array([False, False]) - -parameter_list = [[traindat,testdat,label_traindat,feattypes]] - -def multiclass_cartree_modular(train=traindat,test=testdat,labels=label_traindat,ft=feattypes): - try: - from modshogun import RealFeatures, MulticlassLabels, CSVFile, CARTree, PT_MULTICLASS - except ImportError: - print("Could not import Shogun modules") - return - - # wrap features and labels into Shogun objects - feats_train=RealFeatures(CSVFile(train)) - feats_test=RealFeatures(CSVFile(test)) - train_labels=MulticlassLabels(CSVFile(labels)) - - # CART Tree formation with 5 fold cross-validation pruning - c=CARTree(ft,PT_MULTICLASS,5,True) - c.set_labels(train_labels) - c.train(feats_train) - - # Classify test data - output=c.apply_multiclass(feats_test).get_labels() - - return c,output - -if __name__=='__main__': - print('CARTree') - multiclass_cartree_modular(*parameter_list[0])