[![mlpack-lab Image](https://img.shields.io/endpoint?url=https%3A%2F%2Flab.kurg.org%2Fstatus%2Fstatus.json)](https://lab.mlpack.org)

In [1]:
/**
 * @file covertype-rf-cpp.ipynb
 *
 * Classification using Random Forest on the Covertype dataset.
 */

In [2]:
!wget -O - https://lab.mlpack.org/data/covertype-small.csv.gz | gunzip -c > covertype-small.csv

In [3]:
#include <mlpack/core.hpp>
#include <mlpack/core/data/split_data.hpp>
#include <mlpack/methods/random_forest/random_forest.hpp>

In [4]:
using namespace mlpack;

In [5]:
using namespace mlpack::tree;

In [6]:
// Load the dataset that we downloaded.
arma::mat dataset;
data::Load("covertype-small.csv", dataset);

In [7]:
// Labels are the last row.
arma::Row<size_t> labels =
    arma::conv_to<arma::Row<size_t>>::from(dataset.row(dataset.n_rows - 1));
dataset.shed_row(dataset.n_rows - 1);

In [8]:
arma::mat trainSet, testSet;
arma::Row<size_t> trainLabels, testLabels;

// Split dataset randomly into training set and test set.
data::Split(dataset, labels, trainSet, testSet, trainLabels, testLabels,
    0.3 /* Percentage of dataset to use for test set. */);

In [9]:
RandomForest<> rf(trainSet, trainLabels, 8 /* Number of classes in dataset */,
    10 /* 10 trees */);

In [10]:
// Predict the labels of the test points.
arma::Row<size_t> output;
rf.Classify(testSet, output);

In [11]:
// Now print the accuracy. The 'probabilities' output could also be used to
// generate an ROC curve.
const size_t correct = arma::accu(output == testLabels);
std::cout << correct
          << " correct out of "
          << testLabels.n_elem << " ("
          << 100.0 * correct / testLabels.n_elem
          << ")." << std::endl;

24577 correct out of 30000 (81.9233).
