In [None]:
import numpy as np
import cupy as cp

import pandas as pd
import cudf as cd

import numba
import numba.cuda

from cuml import Ridge as cumlRidge

from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
cd.set_allocator(pool=True)

In [None]:
# Load some data
diabetes = datasets.load_diabetes()

# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.2)

# Duplicate data to make bigger
dupN = int(1e5)
X_train_dup = np.array(np.vstack(dupN * [X_train]))
y_train_dup = np.array(np.hstack(dupN * [y_train]))

# Ensure data is Fortran ordered
X_train_dup, X_test, y_train_dup, y_test = map(np.asfortranarray,
                                               [X_train_dup, X_test, y_train_dup, y_test])

# Move to GPU
cu_X_train_dup, cu_X_test, cu_y_train_dup, cu_y_test = map(cp.asarray,
                                                           [X_train_dup, X_test, y_train_dup, y_test])
cp.cuda.Stream().synchronize()

# Create dataframes
gdf_X_train_dup = cd.DataFrame((("fea%d" % i, cu_X_train_dup[:,i]) for i in range(cu_X_train_dup.shape[1])))
gdf_y_train_dup = cd.DataFrame(dict(train=y_train_dup))

In [None]:
fit_intercept = True
normalize = False
alpha = np.array([1.0])

params = {'alpha': np.logspace(-3, -1, 10)}

In [None]:
clf = linear_model.Ridge(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, solver='cholesky')
cu_clf = cumlRidge(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, solver="eig")

In [None]:
%%time
sk_grid = GridSearchCV(clf, params, cv=5, iid=False, n_jobs=-1)
sk_grid.fit(X_train_dup, y_train_dup)

In [None]:
%%time
cu_sk_grid = GridSearchCV(cu_clf, params, cv=5, iid=False)
cu_sk_grid.fit(gdf_X_train_dup, gdf_y_train_dup)