In [339]:
import math

import pandas as pd
import numpy as np
from numba import cuda
import time

df = pd.read_csv('data/CC GENERAL.csv')

print(df.head())
print("======================")
print(df.isnull().sum())
print("======================")

df['MINIMUM_PAYMENTS'] = df['MINIMUM_PAYMENTS'].fillna(df['MINIMUM_PAYMENTS'].median())
df['CREDIT_LIMIT'] = df['CREDIT_LIMIT'].fillna(df['CREDIT_LIMIT'].mean())
print(df.isnull().sum())
print("======================")

df = df.drop('CUST_ID', axis=1)
np_data = df.to_numpy()
print(np_data.shape)
print("======================")

  CUST_ID      BALANCE  BALANCE_FREQUENCY  PURCHASES  ONEOFF_PURCHASES  \
0  C10001    40.900749           0.818182      95.40              0.00   
1  C10002  3202.467416           0.909091       0.00              0.00   
2  C10003  2495.148862           1.000000     773.17            773.17   
3  C10004  1666.670542           0.636364    1499.00           1499.00   
4  C10005   817.714335           1.000000      16.00             16.00   

   INSTALLMENTS_PURCHASES  CASH_ADVANCE  PURCHASES_FREQUENCY  \
0                    95.4      0.000000             0.166667   
1                     0.0   6442.945483             0.000000   
2                     0.0      0.000000             1.000000   
3                     0.0    205.788017             0.083333   
4                     0.0      0.000000             0.083333   

   ONEOFF_PURCHASES_FREQUENCY  PURCHASES_INSTALLMENTS_FREQUENCY  \
0                    0.000000                          0.083333   
1                    0.000000       

In [340]:
k = 20
centroid = np_data[np.random.randint(np_data.shape[0], size=k), :]

In [341]:
def next_power_of_2(x):
    return 1 << (x - 1).bit_length()

In [342]:
def calc_dimension_for_distance(data, data_centroid):
    real_dim_x = data_centroid.shape[0]
    dim_x = next_power_of_2(real_dim_x)
    dim_y = dim_x
    thread_per_blocks = (dim_x, dim_y)
    blocks_per_grid_x = 1
    blocks_per_grid_y = math.ceil(data.shape[0] / thread_per_blocks[0])
    blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)
    return thread_per_blocks, blocks_per_grid

In [343]:
dist_tpb, dist_bpg = calc_dimension_for_distance(np_data, centroid)

In [344]:
dist_tpb

(32, 32)

In [345]:
dist_bpg

(1, 280)

In [346]:
@cuda.jit
def calc_distance_kernel(data, data_centroid, result):
    r = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
    c = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    if r < data.shape[0] and c < data_centroid.shape[0]:
        total = 0
        for i in range(data_centroid.shape[1]):
            total += math.pow(data[r][i] - data_centroid[c][i], 2)
        result[r, c] = math.sqrt(total)

In [347]:
dist_gpu_time_end = 0
dist_gpu_time_start = 0

In [348]:
def calc_distance_gpu(data, data_centroid):
    result = np.zeros((data.shape[0], data_centroid.shape[0]))
    data_device = cuda.to_device(data)
    centroid_device = cuda.to_device(centroid)
    result_device = cuda.to_device(result)

    # invoke kernel
    global dist_gpu_time_end
    global dist_gpu_time_start
    dist_gpu_time_start =time.perf_counter()
    calc_distance_kernel[dist_bpg, dist_tpb](data_device, centroid_device, result_device)
    dist_gpu_time_end =time.perf_counter()
    result = result_device.copy_to_host()
    return result


In [349]:
def calc_distance_cpu(data, data_centroid):
    dist = np.zeros((data.shape[0], data_centroid.shape[0]))
    for i in range(data.shape[0]):
        for j in range(data_centroid.shape[0]):
            dist[i][j] = np.linalg.norm(data[i] - centroid[j])
    return dist

In [350]:
calculated_dist_gpu =  calc_distance_gpu(np_data, centroid)
dist_cpu_time_start =time.perf_counter()
calculated_dist_cpu =  calc_distance_cpu(np_data, centroid)
dist_cpu_time_end =time.perf_counter()

In [351]:
calculated_dist_gpu

array([[ 510.38193817, 2269.22264222, 3007.87986763, ..., 3847.54350287,
        2685.06168171, 8067.85916792],
       [9930.99336955, 8666.61549063, 8679.32795526, ..., 7702.30784737,
        9055.22038425, 8410.41724789],
       [6620.70628601, 4845.61413332, 4334.59719793, ..., 3884.02625104,
        5637.30464265, 4740.51791811],
       ...,
       [ 522.78161114, 2304.09767121, 3016.03415218, ..., 3854.00933843,
        2724.10904162, 8071.81613053],
       [1008.37255051, 2769.32607502, 3515.47062355, ..., 4300.59923903,
        3029.99841285, 8586.85672113],
       [1593.52277655, 2431.8748401 , 3136.91411571, ..., 3136.09659326,
        1861.04693005, 7666.58017801]])

In [352]:
calculated_dist_gpu.shape

(8950, 20)

In [353]:
dist_gpu_time_end - dist_gpu_time_start

0.3126320819999364

In [354]:
calculated_dist_cpu

array([[ 510.38193817, 2269.22264222, 3007.87986763, ..., 3847.54350287,
        2685.06168171, 8067.85916792],
       [9930.99336955, 8666.61549063, 8679.32795526, ..., 7702.30784737,
        9055.22038425, 8410.41724789],
       [6620.70628601, 4845.61413332, 4334.59719793, ..., 3884.02625104,
        5637.30464265, 4740.51791811],
       ...,
       [ 522.78161114, 2304.09767121, 3016.03415218, ..., 3854.00933843,
        2724.10904162, 8071.81613053],
       [1008.37255051, 2769.32607502, 3515.47062355, ..., 4300.59923903,
        3029.99841285, 8586.85672113],
       [1593.52277655, 2431.8748401 , 3136.91411571, ..., 3136.09659326,
        1861.04693005, 7666.58017801]])

In [355]:
calculated_dist_cpu.shape

(8950, 20)

In [356]:
dist_cpu_time_end - dist_cpu_time_start

2.0101311089999854