# Stochastic gradient descent (SGD) 

Stochastic gradient descent is an iterative algorithm that optimizes an objective function by using samples from the dataset.

The cuML implementation can take array-like objects, either in host as NumPy arrays or in device (as Numba or _cuda_array_interface_compliant), as well  as cuDF DataFrames. 

For information about cuDF,  refer the [cuDF documentation](https://rapidsai.github.io/projects/cudf/en/latest/)

For information about cuML's SGD implementation, refer to the [cuML documentation](https://rapidsai.github.io/projects/cuml/en/latest/index.html)

In [None]:
import os

import numpy as np

import pandas as pd
import cudf as gd

from cuml.linear_model import MBSGDRegressor as cumlSGD
from sklearn.linear_model import SGDRegressor as skSGD

## Define Parameters

In [None]:
n_samples = 2**20
n_features = 399

learning_rate = 'adaptive'
datatype = np.float32
penalty = 'elasticnet'
loss = 'squared_loss'
max_iter = 10

## Generate Data

In [None]:
%%time
X,y = make_regression(n_samples=n_samples, n_features=n_features, random_state=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

## Scikit-learn Model

### Fit 

In [None]:
%%time
sk_sgd = SGDRegressor(learning_rate=learning_rate, 
                      eta0=0.07,
                      max_iter=max_iter, 
                      tol=0.0, 
                      fit_intercept=True,
                      penalty=penalty, 
                      loss=loss)

sk_sgd.fit(X_train, y_train_ser)

### Predict

In [None]:
%%time
y_sk = sk_sgd.predict(X_test)

error_sk = mean_squared_error(y_test,y_sk)

## cuML Model

In [None]:
%%time
X_cudf = gd.DataFrame.from_pandas(X_train)
X_cudf_test = gd.DataFrame.from_pandas(X_test)

y_cudf = gd.Series(y_train_ser)

### Fit

In [None]:
%%time
cu_sgd = cumlSGD(learning_rate=learning_rate, 
                 eta0=0.07, 
                 epochs=iterations,
                 batch_size=512,
                 tol=0.0, 
                 penalty=penalty, 
                 loss=loss)

cu_sgd.fit(X_cudf, y_cudf)

### Predict

In [None]:
%%time
y_pred = cu_sgd.predict(X_cudf_test).to_array().ravel()

error_cu = mean_squared_error(y_test,y_pred)

## Evaluate Results

In [None]:
print("SKL MSE(y):")
print(error_sk)
print("CUML MSE(y):")
print(error_cu)