In [None]:
import sys

if "google.colab" in sys.modules:
    !git clone https://github.com/rapidsai/rapidsai-csp-utils.git
    !python rapidsai-csp-utils/colab/pip-install.py

Cloning into 'rapidsai-csp-utils'...
remote: Enumerating objects: 603, done.[K
remote: Counting objects: 100% (169/169), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 603 (delta 131), reused 82 (delta 82), pack-reused 434 (from 3)[K
Receiving objects: 100% (603/603), 199.38 KiB | 11.08 MiB/s, done.
Resolving deltas: 100% (305/305), done.
Installing RAPIDS remaining 25.08 libraries
Using Python 3.12.12 environment at: /usr
Resolved 180 packages in 1.50s
Prepared 41 packages in 41.84s
Uninstalled 31 packages in 936ms
Installed 41 packages in 576ms
 - bokeh==3.7.3
 + bokeh==3.6.3
 + cucim-cu12==25.8.0
 + cuda-bindings==12.9.4
 + cuda-pathfinder==1.3.1
 - cuda-python==12.6.2.post1
 + cuda-python==12.9.4
 - cudf-cu12==25.6.0 (from https://pypi.nvidia.com/cudf-cu12/cudf_cu12-25.6.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl)
 + cudf-cu12==25.8.0
 + cugraph-cu12==25.8.0
 - cuml-cu12==25.6.0
 + cuml-cu12==25.8.0
 - cuvs-cu12==25.6.1
 + cuvs-cu12=

# 데이터 불러오기

In [None]:
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()
print('학습 데이터셋 크기:', X_train.shape, y_train.shape)
print('테스트 데이터셋 크기:', X_test.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
학습 데이터셋 크기: (60000, 28, 28) (60000,)
테스트 데이터셋 크기: (10000, 28, 28) (10000,)


# scikit-learn k-NN Classifier

In [None]:
%%time

from tensorflow.keras.datasets import mnist

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


# MNIST 데이터셋 불러오기
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 데이터를 1차원 벡터로 변환 (각 이미지가 28x28에서 784로 변환됨)
X_train = X_train.reshape(X_train.shape[0], -1)  # 크기 (60,000, 784)
X_test = X_test.reshape(X_test.shape[0], -1)  # 크기 (10,000, 784)

# scikit-learn k-NN 모델 초기화 (n_neighbors 기본값은 5)
knn_model = KNeighborsClassifier(n_neighbors=3)

# 모델 학습
knn_model.fit(X_train, y_train)

# 테스트 데이터 예측
y_pred = knn_model.predict(X_test)

# 정확도 측정
sklearn_accuracy = accuracy_score(y_test, y_pred)

# 정확도 출력
print('scikit-learn k-NN Classifier 정확도:', sklearn_accuracy)

scikit-learn k-NN Classifier 정확도: 0.9705
CPU times: user 50 s, sys: 4.67 s, total: 54.7 s
Wall time: 34.6 s


# cuML k-NN Classifier

In [None]:
%%time
from tensorflow.keras.datasets import mnist

import cudf
import cuml
from cuml.neighbors import KNeighborsClassifier
from cuml.metrics import accuracy_score


# MNIST 데이터셋 불러오기
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 데이터를 1차원 벡터로 변환 (각 이미지가 28x28에서 784로 변환됨)
X_train = X_train.reshape(X_train.shape[0], -1) # (60,000, 784)
X_test = X_test.reshape(X_test.shape[0], -1) # (10,000, 784)

# cuDF DataFrame으로 변환하여 GPU 메모리에 올리기
X_train_cudf = cudf.DataFrame.from_records(X_train)
X_test_cudf = cudf.DataFrame.from_records(X_test)
y_train_cudf = cudf.Series(y_train)
y_test_cudf = cudf.Series(y_test)

# cuML k-NN 모델 초기화 (n_neighbors 기본값은 5)
knn_model = KNeighborsClassifier(n_neighbors=3)

# 모델 학습
knn_model.fit(X_train_cudf, y_train_cudf)

# 테스트 데이터 예측
y_pred_cudf = knn_model.predict(X_test_cudf)

# 정확도 측정
cuml_accuracy = accuracy_score(y_test_cudf, y_pred_cudf)

# 정확도 출력
print('cuML k-NN Classifier 정확도:', cuml_accuracy)

cuML k-NN Classifier 정확도: 0.9705
CPU times: user 7.79 s, sys: 851 ms, total: 8.64 s
Wall time: 14.8 s
