## Predicting Salary using Linear Regression

### Objective
* We have to predict the salary of an employee given how many years of experience they have.

### Dataset
* Salary_Data.csv has 2 columns — “Years of Experience” and “Salary” for 30 employees in a company

### Approach
* So in this example, we will train a Linear Regression model to learn the correlation between the number of years of experience of each employee and their respective salary. 
* Once the model is trained, we will be able to do some sample predictions.

In [None]:
// Import necessary library headers

#include <mlpack/core.hpp>
#include <mlpack/core/data/split_data.hpp>
#include <mlpack/methods/linear_regression/linear_regression.hpp>
#include <cmath>

In [None]:
#define WITHOUT_NUMPY 1
#include "matplotlibcpp.h"
#include "xwidgets/ximage.hpp"

namespace plt = matplotlibcpp;

In [None]:
using namespace mlpack;
using namespace mlpack::regression;

In [None]:
// Load the dataset into armadillo matrix

arma::mat inputs;
data::Load("Salary_Data.csv", inputs);

In [None]:
// Drop the first row as they represent header

inputs.shed_col(0);

In [None]:
// Display the first 5 rows of the input data

std::cout<<std::setw(18)<<"Years Of Experience"<<std::setw(10)<<"Salary"<<std::endl;
std::cout<<inputs.submat(0, 0, inputs.n_rows-1, 5).t()<<std::endl;

In [None]:
// Plot the input data

std::vector<double> x = arma::conv_to<std::vector<double>>::from(inputs.row(0));
std::vector<double> y = arma::conv_to<std::vector<double>>::from(inputs.row(1));

matplotlibcpp::scatter(x, y, 12, {{"color", "coral"}});
plt::xlabel("Years of Experience");
plt::ylabel("Salary in $");
plt::title("Experience vs. Salary");

matplotlibcpp::save("./scatter1.png");
auto img = xw::image_from_file("scatter.png").finalize();
img

In [None]:
// Split the data into features (X) and target (y) variables
// targets are the last row

arma::Row<size_t> targets = arma::conv_to<arma::Row<size_t>>::from(inputs.row(inputs.n_rows - 1));

In [None]:
// Labels are dropped from the originally loaded data to be used as features

inputs.shed_row(inputs.n_rows - 1);

In [None]:
// Split the dataset into train and test sets using mlpack

arma::mat Xtrain;
arma::mat Xtest;
arma::Row<size_t> Ytrain;
arma::Row<size_t> Ytest;
data::Split(inputs, targets, Xtrain, Xtest, Ytrain, Ytest, 0.4);

In [None]:
// Convert armadillo Rows into rowvec. (Required by mlpacks' LinearRegression API in this format)

arma::rowvec y_train = arma::conv_to<arma::rowvec>::from(Ytrain);
arma::rowvec y_test = arma::conv_to<arma::rowvec>::from(Ytest);

## Linear Model

In [None]:
// Create and Train Linear Regression model

regression::LinearRegression lr(Xtrain, y_train, 0.5);

In [None]:
// Make predictions for test data points

arma::rowvec y_preds;
lr.Predict(Xtest, y_preds);

In [None]:
// convert armadillo vectors and matrices to vector for plotting purpose

std::vector<double> x_test = arma::conv_to<std::vector<double>>::from(Xtest);
std::vector<double> y_t = arma::conv_to<std::vector<double>>::from(y_test);
std::vector<double> y_p = arma::conv_to<std::vector<double>>::from(y_preds);

In [None]:
// Visualize Predicted datapoints

plt::scatter(x_test, y_t, 12, {{"color", "coral"}});
plt::plot(x_test,y_p);
plt::xlabel("Years of Experience");
plt::ylabel("Salary in $");
plt::title("Predicted Experience vs. Salary");

plt::save("./scatter1.png");
auto img = xw::image_from_file("scatter.png").finalize();
img

In [None]:
// Model evaluation metrics

std::cout << "Mean Absolute Error: " << arma::mean(arma::abs(y_preds - y_test)) << std::endl;
std::cout << "Mean Squared Error: " << arma::mean(arma::pow(y_preds - y_test,2)) << std::endl;
std::cout << "Root Mean Squared Error: " << sqrt(arma::mean(arma::pow(y_preds - y_test,2))) << std::endl;