[![Binder](https://mybinder.org/badge_logo.svg)](https://lab.mlpack.org/v2/gh/mlpack/examples/master?urlpath=lab%2Ftree%2Fforest_covertype_prediction_with_random_forests%2Fcovertype-rf-cpp.ipynb)

In [1]:
/**
 * @file covertype-rf-cpp.ipynb
 *
 * Classification using Random Forest on the Covertype dataset.
 */

In [2]:
!wget -O - https://datasets.mlpack.org/covertype-small.csv.gz | gunzip -c > covertype-small.csv

In [3]:
#include <mlpack/xeus-cling.hpp>
#include <mlpack.hpp>

In [4]:
using namespace mlpack;

In [5]:
// Load the dataset that we downloaded.
arma::mat dataset;
data::Load("covertype-small.csv", dataset);

In [6]:
// Labels are the last row.
// The dataset stores labels from 1 through 7, but we need 0 through 6
// (in mlpack labels are zero-indexed), so we subtract 1.
arma::Row<size_t> labels =
    arma::conv_to<arma::Row<size_t>>::from(dataset.row(dataset.n_rows - 1)) - 1;
dataset.shed_row(dataset.n_rows - 1);

In [7]:
arma::mat trainSet, testSet;
arma::Row<size_t> trainLabels, testLabels;

// Split dataset randomly into training set and test set.
data::Split(dataset, labels, trainSet, testSet, trainLabels, testLabels,
    0.3 /* Percentage of dataset to use for test set. */);

In [8]:
RandomForest<> rf(trainSet, trainLabels, 7 /* Number of classes in dataset */,
    10 /* 10 trees */);

In [9]:
// Predict the labels of the test points.
arma::Row<size_t> output;
rf.Classify(testSet, output);

In [10]:
// Now print the accuracy. The 'probabilities' output could also be used to
// generate an ROC curve.
const size_t correct = arma::accu(output == testLabels);
std::cout << correct
          << " correct out of "
          << testLabels.n_elem << " ("
          << 100.0 * correct / testLabels.n_elem
          << "%)." << std::endl;

24747 correct out of 30000 (82.49%).
