# Linear Regression
LinearRegression is a simple machine learning model where the response y is modelled by a linear combination of the predictors in X.

For information on converting your dataset to cuDF format, refer to the [cuDF documentation](https://rapidsai.github.io/projects/cudf/en/latest/)

For additional information cuML's linear regression: https://rapidsai.github.io/projects/cuml/en/latest/api.html#linear-regression

In [None]:
import os
import numpy as np

import pandas as pd
import cudf as gd

from sklearn.model_selection import train_test_split

from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

from cuml.linear_model import LinearRegression as cuLR
from sklearn.linear_model import LinearRegression as skLR

## Define Parameters

In [None]:
n_samples = 2**20
n_features = 399

## Generate Data

### Host

In [None]:
%%time
X,y = make_regression(n_samples=n_samples, n_features=n_features, random_state=0)

X = pd.DataFrame(X)
y = pd.Series(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

### GPU

In [None]:
%%time
X_cudf = gd.DataFrame.from_pandas(X_train)
X_cudf_test = gd.DataFrame.from_pandas(X_test)

y_cudf = gd.Series(y_train.values)

## Scikit-learn Model

### Fit

In [None]:
%%time
ols_sk = skLR(fit_intercept=True,
              normalize=True,
              n_jobs=-1)

ols_sk.fit(X_train, y_train)

### Evaluate

In [None]:
%%time
predict_sk = ols_sk.predict(X_test)

error_sk = mean_squared_error(y_test, predict_sk)

## cuML Model

### Fit

In [None]:
%%time
ols_cuml = cuLR(fit_intercept=True,
                normalize=True,
                algorithm='eig')

ols_cuml.fit(X_cudf, y_cudf)

### Evaluate

In [None]:
%%time
predict_cuml = ols_cuml.predict(X_cudf_test).to_array()

error_cuml = mean_squared_error(y_test, predict_cuml)

## Compare Results

In [None]:
print("SKL MSE(y): %s" % error_sk)
print("CUML MSE(y): %s" % error_cuml)