From 9cdce5cb489e4b38791b57a64dd6d1fd4806178e Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Thu, 18 Aug 2016 17:57:12 -0700 Subject: [PATCH] fix boolvector --- .../multiclass_classifier/cartree.rst | 47 +++++++++++++++++++ .../meta/src/multiclass_classifier/cartree.sg | 35 ++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 doc/cookbook/source/examples/multiclass_classifier/cartree.rst create mode 100644 examples/meta/src/multiclass_classifier/cartree.sg diff --git a/doc/cookbook/source/examples/multiclass_classifier/cartree.rst b/doc/cookbook/source/examples/multiclass_classifier/cartree.rst new file mode 100644 index 00000000000..5fc90b8c8f1 --- /dev/null +++ b/doc/cookbook/source/examples/multiclass_classifier/cartree.rst @@ -0,0 +1,47 @@ +================================== +Classification And Regression Tree +================================== + +Decision tree learning uses a decision tree as a predictive model which maps observations about an item to conclusions about the item's target value. + +Decision trees are mostly used as the following two types: + +- Classification tree, where the predicted outcome is the class to which the data belongs. +- Regression tree, where predicted outcome can be considered a real number. + +Classification And Regression Tree (CART) algorithm is an umbrella method that can be applied to generate both classification tree and regression tree. + +In this example, we showed how to apply CART algorithm to multi-class dataset and predict the labels with classification tree. + +------- +Example +------- + +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CMulticlassLabels` as + +.. sgexample:: cartree.sg:create_features + +We set the type of each predictive attribute (true for nominal, false for ordinal/continuous) + +.. sgexample:: cartree.sg:set_attribute_types + +We create an instance of the :sgclass:`CCARTree` classifier by passting it the attribute types and the tree type. +We can also set the number of subsets used in cross-valiation and whether to use cross-validation pruning. + +.. sgexample:: cartree.sg:create_instance + +Then we train and apply it to test data, which here gives :sgclass:`CMulticlassLabels`. + +.. sgexample:: cartree.sg:train_and_apply + +We can evaluate test performance via e.g. :sgclass:`CMulticlassAccuracy`. + +.. sgexample:: cartree.sg:evaluate_accuracy + +---------- +References +---------- + +:wiki:`Decision_tree_learning` + +:wiki:`Predictive_analytics#Classification_and_regression_trees_.28CART.29` diff --git a/examples/meta/src/multiclass_classifier/cartree.sg b/examples/meta/src/multiclass_classifier/cartree.sg new file mode 100644 index 00000000000..96d809b5063 --- /dev/null +++ b/examples/meta/src/multiclass_classifier/cartree.sg @@ -0,0 +1,35 @@ +CSVFile f_feats_train("../../data/classifier_4class_2d_linear_features_train.dat") +CSVFile f_feats_test("../../data/classifier_4class_2d_linear_features_test.dat") +CSVFile f_labels_train("../../data/classifier_4class_2d_linear_labels_train.dat") +CSVFile f_labels_test("../../data/classifier_4class_2d_linear_labels_test.dat") + +#![create_features] +RealFeatures features_train(f_feats_train) +RealFeatures features_test(f_feats_test) +MulticlassLabels labels_train(f_labels_train) +MulticlassLabels labels_test(f_labels_test) +#![create_features] + +#![set_attribute_types] +BoolVector ft(2) +ft[0] = 0 +ft[1] = 0 +#![set_attribute_types] + +#![create_instance] +CARTree classifier(ft,enum EProblemType.PT_MULTICLASS, 5, True) +classifier.set_labels(labels_train) +#![create_instance] + +#![train_and_apply] +classifier.train(features_train) +MulticlassLabels labels_predict = classifier.apply_multiclass(features_test) +#![train_and_apply] + +#![evaluate_accuracy] +MulticlassAccuracy eval() +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# integration testing variables +RealVector output = labels_predict.get_labels()