<a href="https://colab.research.google.com/github/paridhimaheshwari/NLP_ADS/blob/main/cuml_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Check Python Version
!python --version

Python 3.10.12


In [2]:
# Check Ubuntu Version
!lsb_release -a

No LSB modules are available.
Distributor ID:	Ubuntu
Description:	Ubuntu 22.04.3 LTS
Release:	22.04
Codename:	jammy


In [3]:
# Check CUDA/cuDNN Version
!nvcc -V && which nvcc

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
/usr/local/cuda/bin/nvcc


In [4]:
# Check GPU
!nvidia-smi

Mon Jan 29 08:46:46 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [11]:
# This get the RAPIDS-Colab install files and test check your GPU.  Run this and the next cell only.
# Please read the output of this cell.  If your Colab Instance is not RAPIDS compatible, it will warn you and give you remediation steps.
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

fatal: destination path 'rapidsai-csp-utils' already exists and is not an empty directory.
***********************************************************************
We will now install RAPIDS via pip! 
Please stand by, should be quick...
***********************************************************************

Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting cupy-cuda12x>=12.0.0 (from cudf-cu12==23.12.*)
  Using cached cupy_cuda12x-13.0.0-cp310-cp310-manylinux2014_x86_64.whl (88.3 MB)
Installing collected packages: cupy-cuda12x
Successfully installed cupy-cuda12x-13.0.0
Collecting cupy-cuda12x
  Using cached cupy_cuda12x-13.0.0-cp310-cp310-manylinux2014_x86_64.whl (88.3 MB)
Installing collected packages: cupy-cuda12x
Successfully installed cupy-cuda12x-13.0.0

        ***********************************************************************
        The pip install of RAPIDS is complete.
        
        Please do not run any further installation from the conda

In [8]:
# Critical imports
import cudf
import cuml
import os
import numpy as np
import pandas as pd


In [9]:
from cuml.datasets import make_regression
from cuml.model_selection import train_test_split
from cuml.linear_model import LinearRegression as cuLR
from sklearn.metrics import r2_score

In [12]:
n_samples = 2**10
n_features = 100
n_info = 70

X_reg, y_reg = make_regression( n_samples = n_samples,n_features = n_features,n_informative = n_info,random_state = 123 )

In [13]:
X_reg

array([[ 1.3027127 ,  0.31813797, -0.5897687 , ...,  2.6206713 ,
         0.18871377, -0.736059  ],
       [-1.46635   , -1.0274912 ,  0.3325379 , ...,  0.97135264,
        -0.6682854 ,  0.90716773],
       [-0.8335923 , -0.7218579 , -0.0048192 , ...,  0.1417525 ,
        -0.74740344,  1.0657316 ],
       ...,
       [-1.588755  , -1.3263025 ,  0.9382942 , ...,  1.0071284 ,
        -0.8321547 , -0.65272844],
       [-0.41182882, -1.9691179 ,  0.84088457, ..., -0.29357943,
         1.5501742 , -0.19898416],
       [-1.1126544 ,  1.3856049 , -1.4245708 , ...,  0.13036194,
        -0.29343295,  1.1867118 ]], dtype=float32)

In [14]:
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split( X_reg,
                                                                     y_reg,
                                                                     train_size = 0.8,
                                                                     random_state = 10 )


In [15]:
cuml_reg_model = cuLR( fit_intercept = True,
                       normalize = True,
                       algorithm = 'eig' )

  return init_func(self, *args, **filtered_kwargs)


In [16]:
trained_LR = cuml_reg_model.fit( X_reg_train, y_reg_train )
cu_preds = trained_LR.predict( X_reg_test )

In [17]:
import cuml
from cupy import asnumpy
from joblib import dump, load
cu_r2 = cuml.metrics.r2_score( y_reg_test, cu_preds )
sk_r2 = r2_score( asnumpy( y_reg_test ), asnumpy( cu_preds ) )

In [18]:
print("cuml's r2 score : ", cu_r2)
print("sklearn's r2 score : ", sk_r2)

# save and reload
dump( trained_LR, 'LR.model')

cuml's r2 score :  1.0
sklearn's r2 score :  0.9999999999992049


['LR.model']

In [19]:
# to reload the model uncomment the line below
# loaded_model = load('LR.model')