In [65]:
import math

import pandas as pd
import numpy as np
from numba import cuda
import time
import copy


df = pd.read_csv('data/CC GENERAL.csv')

print(df.head())
print("======================")
print(df.isnull().sum())
print("======================")

df['MINIMUM_PAYMENTS'] = df['MINIMUM_PAYMENTS'].fillna(df['MINIMUM_PAYMENTS'].median())
df['CREDIT_LIMIT'] = df['CREDIT_LIMIT'].fillna(df['CREDIT_LIMIT'].mean())
print(df.isnull().sum())
print("======================")

df = df.drop('CUST_ID', axis=1)
np_data = df.to_numpy()
print(np_data.shape)
print("======================")

  CUST_ID      BALANCE  BALANCE_FREQUENCY  PURCHASES  ONEOFF_PURCHASES  \
0  C10001    40.900749           0.818182      95.40              0.00   
1  C10002  3202.467416           0.909091       0.00              0.00   
2  C10003  2495.148862           1.000000     773.17            773.17   
3  C10004  1666.670542           0.636364    1499.00           1499.00   
4  C10005   817.714335           1.000000      16.00             16.00   

   INSTALLMENTS_PURCHASES  CASH_ADVANCE  PURCHASES_FREQUENCY  \
0                    95.4      0.000000             0.166667   
1                     0.0   6442.945483             0.000000   
2                     0.0      0.000000             1.000000   
3                     0.0    205.788017             0.083333   
4                     0.0      0.000000             0.083333   

   ONEOFF_PURCHASES_FREQUENCY  PURCHASES_INSTALLMENTS_FREQUENCY  \
0                    0.000000                          0.083333   
1                    0.000000       

In [66]:
k = 20
centroid = np_data[np.random.randint(np_data.shape[0], size=k), :]

In [67]:
def next_power_of_2(x):
    return 1 << (x - 1).bit_length()

In [68]:
def calc_dimension_for_distance(data, data_centroid):
    real_dim_x = data_centroid.shape[0]
    dim_x = next_power_of_2(real_dim_x)
    dim_y = dim_x
    thread_per_blocks = (dim_x, dim_y)
    blocks_per_grid_x = 1
    blocks_per_grid_y = math.ceil(data.shape[0] / thread_per_blocks[0])
    blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)
    return thread_per_blocks, blocks_per_grid

In [69]:
dist_tpb, dist_bpg = calc_dimension_for_distance(np_data, centroid)

In [70]:
dist_tpb

(32, 32)

In [71]:
dist_bpg

(1, 280)

In [72]:
@cuda.jit
def calc_distance_kernel(data, data_centroid, result):
    r = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
    c = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    if r < data.shape[0] and c < data_centroid.shape[0]:
        total = 0
        for i in range(data_centroid.shape[1]):
            total += math.pow(data[r][i] - data_centroid[c][i], 2)
        result[r, c] = math.sqrt(total)

In [74]:
def calc_distance_gpu(data, data_centroid):
    result = np.zeros((data.shape[0], data_centroid.shape[0]))
    data_device = cuda.to_device(data)
    centroid_device = cuda.to_device(data_centroid)
    result_device = cuda.to_device(result)

    # invoke kernel
    calc_distance_kernel[dist_bpg, dist_tpb](data_device, centroid_device, result_device)
    result = result_device.copy_to_host()
    return result


In [75]:
def calc_distance_cpu(data, data_centroid):
    dist = np.zeros((data.shape[0], data_centroid.shape[0]))
    for i in range(data.shape[0]):
        for j in range(data_centroid.shape[0]):
            dist[i][j] = np.linalg.norm(data[i] - data_centroid[j])
    return dist

In [85]:
def get_nearest_centroid(distance):
    return np.argmin(distance, axis=1)

In [None]:
def get_new_centroids(data, data_nearest_centroid, number_of_centroid):
    result_centroids = np.zeros((number_of_centroid, data.shape[1]))
    for i in range(number_of_centroid):
        result_centroids[i] = data[np.where(data_nearest_centroid == i)].mean(axis=0)
    return result_centroids

#CPU


In [86]:
def kmean_cpu(data, initial_centroid):
    centroid_cpu = copy.deepcopy(initial_centroid)
    iteration = 0
    has_changed_centroid = True
    total_time_start = time.perf_counter()
    while has_changed_centroid:
        # iter_time_start = time.perf_counter()
        # calc_dist_time_start = time.perf_counter()
        calculated_dist = calc_distance_cpu(data, centroid_cpu)  # calculated dist
        # calc_dist_time_end = time.perf_counter()
        # nearest_centroid_time_start = time.perf_counter()
        nearest_centroid = get_nearest_centroid(calculated_dist)  # assigned to centroid
        # nearest_centroid_time_end = time.perf_counter()
        new_centroid = get_new_centroids(data, nearest_centroid, k)
        if np.all(new_centroid == centroid_cpu):
            has_changed_centroid = False
        else:
            # print(f"changed {np.linalg.norm(centroid - new_centroid)}")
            centroid_cpu = new_centroid
        # iter_time_end = time.perf_counter()
        # print(
        #     f"iter {iteration} | took: {iter_time_end - iter_time_start:0.4f} |"
        #     f" dist: {calc_dist_time_end - calc_dist_time_start:0.4f} | "
        #     f"nearest: {nearest_centroid_time_end - nearest_centroid_time_start:0.4f}")
        iteration += 1
    total_time_end = time.perf_counter()
    # print("new centroids:")
    # print(centroid_cpu)
    print(f"k: {k} | iteration took: {iteration} | total time: {total_time_end - total_time_start:0.4f}")
    return centroid_cpu

In [90]:
centroid_cpu_result = kmean_cpu(np_data, centroid)

k: 20 | iteration took: 66 | total time: 92.4939


#GPU


In [88]:
def kmean_gpu(data, initial_centroid):
    centroid_gpu = copy.deepcopy(initial_centroid)
    iteration = 0
    has_changed_centroid = True
    total_time_start = time.perf_counter()
    while has_changed_centroid:
        # iter_time_start = time.perf_counter()
        # calc_dist_time_start = time.perf_counter()
        calculated_dist = calc_distance_gpu(data, centroid_gpu)  # calculated dist
        # calc_dist_time_end = time.perf_counter()
        # nearest_centroid_time_start = time.perf_counter()
        nearest_centroid = get_nearest_centroid(calculated_dist)  # assigned to centroid
        # nearest_centroid_time_end = time.perf_counter()
        new_centroid = get_new_centroids(data, nearest_centroid, k)
        if np.all(new_centroid == centroid_gpu):
            has_changed_centroid = False
        else:
            # print(f"changed {np.linalg.norm(centroid_gpu - new_centroid)}")
            centroid_gpu = new_centroid
        # iter_time_end = time.perf_counter()
        # print(
        #     f"iter {iteration} | took: {iter_time_end - iter_time_start:0.4f} |"
        #     f" dist: {calc_dist_time_end - calc_dist_time_start:0.4f} | "
        #     f"nearest: {nearest_centroid_time_end - nearest_centroid_time_start:0.4f}")
        iteration += 1
    total_time_end = time.perf_counter()
    # print("new centroids:")
    # print(centroid_gpu)
    print(f"k: {k} | iteration took: {iteration} | total time: {total_time_end - total_time_start:0.4f}")
    return centroid_gpu

In [89]:
centroid_gpu_result = kmean_gpu(np_data, centroid)

k: 20 | iteration took: 66 | total time: 0.8344


In [91]:
np.square(np.subtract(centroid_cpu_result, centroid_gpu_result)).mean()

0.0

In [92]:
centroid_cpu_result

array([[5.48743561e+03, 1.00000000e+00, 1.07112444e+03, 7.52950000e+01,
        9.95829444e+02, 9.21412910e+02, 4.93686833e-01, 3.70369444e-02,
        4.70538722e-01, 6.94443889e-02, 2.05555556e+00, 2.79444444e+01,
        5.59166667e+03, 1.37885129e+03, 3.74127086e+04, 0.00000000e+00,
        1.19444444e+01],
       [5.21289310e+03, 9.94949496e-01, 4.72349654e+03, 3.02403268e+03,
        1.69946386e+03, 7.44652858e+02, 9.20603669e-01, 6.67979016e-01,
        7.00131236e-01, 1.02799622e-01, 2.42519685e+00, 6.23385827e+01,
        1.02929134e+04, 3.01750509e+03, 2.02147971e+03, 1.57480079e-02,
        1.18346457e+01],
       [5.62656430e+03, 9.49545450e-01, 2.91751320e+04, 2.36507215e+04,
        5.52441050e+03, 1.11562704e+03, 8.90833300e-01, 8.27500050e-01,
        6.64999950e-01, 4.58333000e-02, 2.05000000e+00, 1.23750000e+02,
        1.61250000e+04, 2.94495967e+04, 3.66384018e+03, 5.21780300e-01,
        1.19000000e+01],
       [7.90910969e+02, 8.86079062e-01, 1.11457721e+03, 6.185

In [93]:
centroid_gpu_result

array([[5.48743561e+03, 1.00000000e+00, 1.07112444e+03, 7.52950000e+01,
        9.95829444e+02, 9.21412910e+02, 4.93686833e-01, 3.70369444e-02,
        4.70538722e-01, 6.94443889e-02, 2.05555556e+00, 2.79444444e+01,
        5.59166667e+03, 1.37885129e+03, 3.74127086e+04, 0.00000000e+00,
        1.19444444e+01],
       [5.21289310e+03, 9.94949496e-01, 4.72349654e+03, 3.02403268e+03,
        1.69946386e+03, 7.44652858e+02, 9.20603669e-01, 6.67979016e-01,
        7.00131236e-01, 1.02799622e-01, 2.42519685e+00, 6.23385827e+01,
        1.02929134e+04, 3.01750509e+03, 2.02147971e+03, 1.57480079e-02,
        1.18346457e+01],
       [5.62656430e+03, 9.49545450e-01, 2.91751320e+04, 2.36507215e+04,
        5.52441050e+03, 1.11562704e+03, 8.90833300e-01, 8.27500050e-01,
        6.64999950e-01, 4.58333000e-02, 2.05000000e+00, 1.23750000e+02,
        1.61250000e+04, 2.94495967e+04, 3.66384018e+03, 5.21780300e-01,
        1.19000000e+01],
       [7.90910969e+02, 8.86079062e-01, 1.11457721e+03, 6.185