In [1]:
import cudf as pd
from sklearn.datasets import load_boston
boston = load_boston()
df = pd.DataFrame(data=boston.data, columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
        'TAX', 'PTRATIO', 'B', 'LSTAT'])
df['target'] = boston.target
df.head(5)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [2]:
import cupy as np
X=df.iloc[:,0:13]
Y=df.iloc[:,13]
Y=np.array(Y)
Y=Y.reshape(-1,1)

In [3]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_scaled=scaler.fit_transform(X.as_matrix())

In [4]:
X_scaled

array([[-0.41978194,  0.28482986, -1.2879095 , ..., -1.45900038,
         0.44105193, -1.0755623 ],
       [-0.41733926, -0.48772236, -0.59338101, ..., -0.30309415,
         0.44105193, -0.49243937],
       [-0.41734159, -0.48772236, -0.59338101, ..., -0.30309415,
         0.39642699, -1.2087274 ],
       ...,
       [-0.41344658, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.98304761],
       [-0.40776407, -0.48772236,  0.11573841, ...,  1.17646583,
         0.4032249 , -0.86530163],
       [-0.41500016, -0.48772236,  0.11573841, ...,  1.17646583,
         0.44105193, -0.66905833]])

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.3, random_state=42)

In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(354, 13)
(152, 13)
(354, 1)
(152, 1)


In [7]:
import cuml
from cuml import LinearRegression

In [8]:
lr = LinearRegression(fit_intercept = True, normalize = False, algorithm = 'eig')
reg = lr.fit(X_train,y_train)
print("Coefficients:")
print(reg.coef_)
print("Intercept:")
print(reg.intercept_)
preds = lr.predict(X_test)
print("Predictions:")
print(preds)
print("MSE:")
print(cuml.metrics.regression.mean_squared_error(y_test,preds))
print("R2 Score:")
print(cuml.metrics.regression.r2_score(y_test,preds))
print("MAE:")
print(cuml.metrics.regression.mean_absolute_error(y_test,preds))

Coefficients:
[-1.14691411  0.83432605  0.33940694  0.79163612 -1.784727    2.84783949
 -0.30429306 -2.91562521  2.11140045 -1.46519951 -1.9696347   1.07567771
 -3.90310727]
Intercept:
22.50945471291038
Predictions:
[28.64896005 36.49501384 15.4111932  25.40321303 18.85527988 23.14668944
 17.3921241  14.07859899 23.03692679 20.59943345 24.82286159 18.53057049
 -6.86543527 21.80172334 19.22571177 26.19191985 20.27733882  5.61596432
 40.44887974 17.57695918 27.44319095 30.1715964  10.94055823 24.02083139
 18.07693812 15.934748   23.12614028 14.56052142 22.33482544 19.3257627
 22.16564973 25.19476081 25.31372473 18.51345025 16.6223286  17.50268505
 30.94992991 20.19201752 23.90440431 24.86975466 13.93767876 31.82504715
 42.56978796 17.62323805 27.01963242 17.19006621 13.80594006 26.10356557
 20.31516118 30.08649576 21.3124053  34.15739602 15.60444981 26.11247588
 39.31613646 22.99282065 18.95764781 33.05555669 24.85114223 12.91729352
 22.68101452 30.80336295 31.63522027 16.29833689 21.073

In [9]:
algorithm = ['svd', 'eig', 'qr', 'svd-qr', 'svd-jacobi']

In [10]:
for i in algorithm:
    print("Algorithm:")
    print(i)
    lr = LinearRegression(fit_intercept = True, normalize = False, algorithm = i)
    reg = lr.fit(X_train,y_train)
    preds = lr.predict(X_test)
    print("MSE:")
    print(cuml.metrics.regression.mean_squared_error(y_test,preds))
    print("R2 Score:")
    print(cuml.metrics.regression.r2_score(y_test,preds))
    print("MAE:")
    print(cuml.metrics.regression.mean_absolute_error(y_test,preds))

Algorithm:
svd
MSE:
139.59932404504897
R2 Score:
0.7112260057484925
MAE:
8.983415627235024
Algorithm:
eig
MSE:
139.59932404504923
R2 Score:
0.711226005748492
MAE:
8.983415627235035
Algorithm:
qr
MSE:
139.59932404504897
R2 Score:
0.7112260057484925
MAE:
8.983415627235024
Algorithm:
svd-qr
MSE:
77.29709884147577
R2 Score:
-0.06387682173517528
MAE:
6.538584813856789
Algorithm:
svd-jacobi
MSE:
77.29709884147577
R2 Score:
-0.06387682173517528
MAE:
6.538584813856789
