In [1]:
// Import necessary library headers

#include <mlpack/core.hpp>
#include <mlpack/core/data/split_data.hpp>
#include <mlpack/methods/linear_regression/linear_regression.hpp>
#include <cmath>

In [2]:
#define WITHOUT_NUMPY 1
#include "matplotlibcpp.h"
#include "xwidgets/ximage.hpp"

In [3]:
using namespace mlpack;
using namespace mlpack::regression;
namespace plt = matplotlibcpp;

In [4]:
// Load the dataset into armadillo matrix

arma::mat inputs;
data::Load("Salary_Data.csv", inputs);

In [5]:
// Drop the first row as they represent header

inputs.shed_col(0);

In [6]:
// Display the first 5 rows of the input data

std::cout<<std::setw(18)<<"Years Of Experience"<<std::setw(10)<<"Salary"<<std::endl;
std::cout<<inputs.submat(0, 0, inputs.n_rows-1, 5).t()<<std::endl;

Years Of Experience    Salary
   1.1000e+00   3.9343e+04
   1.3000e+00   4.6205e+04
   1.5000e+00   3.7731e+04
   2.0000e+00   4.3525e+04
   2.2000e+00   3.9891e+04
   2.9000e+00   5.6642e+04



In [None]:
// Plot the input data

std::vector<double> x = arma::conv_to<std::vector<double>>::from(inputs.row(0));
std::vector<double> y = arma::conv_to<std::vector<double>>::from(inputs.row(1));

plt::scatter(x, y, 12, {{"color", "coral"}});
plt::xlabel("Years of Experience");
plt::ylabel("Salary in $");
plt::title("Experience vs. Salary");

plt::save("./scatter1.png");
auto img = xw::image_from_file("scatter.png").finalize();
img

In [7]:
// Split the data into features (X) and target (y) variables
// targets are the last row

arma::Row<size_t> targets = arma::conv_to<arma::Row<size_t>>::from(inputs.row(inputs.n_rows - 1));

In [8]:
// Labels are dropped from the originally loaded data to be used as features

inputs.shed_row(inputs.n_rows - 1);

In [9]:
// Split the dataset into train and test sets using mlpack

arma::mat Xtrain;
arma::mat Xtest;
arma::Row<size_t> Ytrain;
arma::Row<size_t> Ytest;
data::Split(inputs, targets, Xtrain, Xtest, Ytrain, Ytest, 0.4);

In [None]:
// Convert armadillo Rows into rowvec. (Required by mlpacks' LinearRegression API in this format)

arma::rowvec y_train = arma::conv_to<arma::rowvec>::from(Ytrain);
arma::rowvec y_test = arma::conv_to<arma::rowvec>::from(Ytest);

In [None]:
// Create and Train Linear Regression model

regression::LinearRegression lr(Xtrain, y_train, 0.5);

In [None]:
// Make predictions for test data points

arma::rowvec y_preds;
lr.Predict(Xtest, y_preds);

In [None]:
// convert armadillo vectors and matrices to vector for plotting purpose

std::vector<double> x_test = arma::conv_to<std::vector<double>>::from(Xtest);
std::vector<double> y_t = arma::conv_to<std::vector<double>>::from(y_test);
std::vector<double> y_p = arma::conv_to<std::vector<double>>::from(y_preds);

In [None]:
// Visualize Predicted datapoints

plt::scatter(x_test, y_t, 12, {{"color", "coral"}});
plt::plot(x_test,y_p);
plt::xlabel("Years of Experience");
plt::ylabel("Salary in $");
plt::title("Predicted Experience vs. Salary");

plt::save("./scatter1.png");
auto img = xw::image_from_file("scatter.png").finalize();
img

In [None]:
// Model evaluation metrics

std::cout<<"Mean Absolute Error: "<<arma::mean(arma::abs(y_preds - y_test))<<std::endl;
std::cout<<"Mean Squared Error: "<<arma::mean(arma::pow(y_preds - y_test,2))<<std::endl;
std::cout<<"Root Mean Squared Error: "<<sqrt(arma::mean(arma::pow(y_preds - y_test,2)))<<std::endl;