In [None]:
!pip install pycuda

In [None]:
import math
from skimage import io, color
from skimage.transform import resize
import numpy as np
import time
import pstats
import cProfile
import pybind11
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from pycuda.compiler import SourceModule

In [None]:
# function which returns an object of class SuperPixel
def make_superPixel(h, w, img, superpixel_id):
    return SuperPixels(h, w, img[h, w][0], img[h, w][1], img[h, w][2], superpixel_id)

In [None]:
# To define the initial cluster centers distanced at S
def initial_cluster_center(S, img, img_h, img_w):
    clusters = []
    height = S // 2
    width = S // 2
    superpixel_id = 0.0
    while h < img_h:
        while w < img_w:
            superpixel_id += 1
            clusters.append(make_superPixel(h, w, img, superpixel_id))
            w += S
        w = S // 2
        h += S
    return clusters

In [None]:
# function to calculate gradient at each pixel
def calc_gradient(h, w, img, img_w, img_h):
    if w + 1 >= img_w:
        w = img_w - 2
    if h + 1 >= img_h:
        h = img_h - 2
    grad = img[w + 1, h + 1][0] - img[w, h][0] + img[w + 1, h + 1][1] - img[w, h][1] + img[w + 1, h + 1][2] - img[w, h][2]
    return grad

In [None]:
# function which reassign the cluster center to the pixel having the lowest gradient
def reassign_cluster_center_acc_to_grad(clusters, img):
    for c in clusters:
        cluster_gradient = calc_gradient(c.h, c.w, img, img_w, img_h)
        for dh in range(-1, 2):
            for dw in range(-1, 2):
                H = c.h + dh
                W = c.w + dw
                new_gradient = calc_gradient(H, W, img, img_w, img_h)
                if new_gradient < cluster_gradient:
                    c.update(H, W, img[H,W][0], img[H,W][1], img[H,W][2])
                    c_gradient = new_gradient


In [None]:
# To-do : Major bottleneck, rewrite in CUDA
# Ensure that acesses are vectorized/ parallel
'''
function to assign pixels to the nearest cluster using the Euclidean distance involving both color and spatial
proximity.
'''
def assign_pixels_to_cluster(clusters, S, img, img_h, img_w, tag, dis):
    for c in clusters:
        for h in range(c.h - 2 * S, c.h + 2 * S):
            if h < 0 or h >= img_h: continue
            for w in range(c.w - 2 * S, c.w + 2 * S):
                if w < 0 or w >= img_w: continue
                l, a, b = img[h,w]
                Dc = math.sqrt(math.pow(l - c.l, 2) + math.pow(a - c.a, 2) + math.pow(b - c.b, 2))
                Ds = math.sqrt(math.pow(h - c.h, 2) + math.pow(w - c.w, 2))
                D = math.sqrt(math.pow(Dc / m, 2) + math.pow(Ds / S, 2))
                if D < dis[h,w]:
                    if (h, w) not in tag:
                        tag[(h, w)] = c
                        c.pixels.append((h, w))
                    else:
                        tag[(h, w)].pixels.remove((h, w))
                        tag[(h, w)] = c
                        c.pixels.append((h, w))
                    dis[h, w] = D

In [None]:
# function to replace the cluster center with the mean of the pixels contained in the cluster
def update_cluster_mean(clusters):
    for c in clusters:
        sum_h = sum_w = number = 0
        #print("c.pixels",c.pixels)
        for p in c.pixels:
            sum_h += p[0]
            sum_w += p[1]
            number += 1
            H = sum_h // number
            W = sum_w // number
            c.update(H, W,img[H, W][0], img[H, W][1], img[H, W][2])

In [None]:
# replace the color of each pixel in a cluster by the color of the cluster's center
def avg_color_cluster(img, name, clusters):
    image = np.copy(img)
    for c in clusters:
        for p in c.pixels:
            image[p[0],p[1]][0] = c.l
            image[p[0],p[1]][1] = c.a
            image[p[0],p[1]][2] = c.b
        # To change the color of cluster center to Black
        # image[c.h, c.w][0] = 0
        # image[c.h, c.w][1] = 0
        # image[c.h, c.w][2] = 0
    lab2rgb(name, image)

In [None]:
# function for the Simple Linear Iterative Clustering
def slic(S, img, img_h, img_w, clusters, tag, dis):
    clusters = initial_cluster_center(S, img, img_h, img_w)
    reassign_cluster_center_acc_to_grad(clusters, img)

    # Runs SLIC for 10 iterations
    for i in range(10):
        assign_pixels_to_cluster(clusters, S, img, img_h, img_w, tag, dis)
        update_cluster_mean(clusters)

        # Saves image after 10 iterations
        if i == 9 :
            name = 'out_m{m}_k{k}.png'.format(loop=i, m=m, k=k)
            avg_color_cluster(img, name, clusters)
    return clusters

In [None]:
# function to convert LAB images back to RGB and save it
def lab2rgb(path, lab_arr):
    rgb_arr = color.lab2rgb(lab_arr)
    rgb_arr = (rgb_arr * 255).astype(np.uint8)
    io.imsave(path, rgb_arr)

In [None]:
# Todo - Assign a cluster id to each superpixel, which makes it easy to assign centers later
# A class to initialize the super pixels, of the form - [h,w,l,a,b].
class SuperPixels(object):

    def __init__(self, h, w, l=0, a=0, b=0, id=0):
        self.update(h, w, l, a, b, id)
        self.pixels = []

    def update(self, h, w, l, a, b, id):
        self.h = h
        self.w = w
        self.l = l
        self.a = a
        self.b = b
        self.id = id

In [None]:
# Contains all kernels to make SLIC faster
cuda_kernel="""
#include <cfloat>

// Define a struct for all cluster centers
struct clusterCenter{
  float l;
  float a;
  float b;
  float x;
  float y;
  float id;
};

// This kernel calculates the distance between a pixel and cluster centers and
// assigns them an ID based on the nearest cluster
__global__ void assignClusterCenters(float* L, float* A, float* B, clusterCenter* clusterPtr,
                                    int* labelMap, int imageWidth, int imageHeight,
                                    int numSuperpixels, int M, int S){

  int tid = threadIdx.x;
  int global_tid = tid + blockIdx.x * blockDim.x;

  // Slic Algorithm operates for 10 iterations for convergence
  for(int i=0; i<10; ++i){

  // Only load into shared memory the first iteration
  if(i==0){

  // Total number of values to load
  int numValues = numSuperpixels * 6;

  // Workload per thread
  int elementsPerThread = (numValues + blockDim.x - 1) / blockDim.x;
  int startIdx = tid * elementsPerThread;

  // Ensure threads don’t access out-of-bounds memory
  for (int i = 0; i < elementsPerThread; ++i) {
    int idx = startIdx + i;
    if (idx < numValues) {
      clusterInfo[idx] = reinterpret_cast<float*>(clusterPtr)[idx];
    }
  }

  // Note to self: Think more about the placement of this barrier, its inside a conditional, might cause sync problems
  __syncthreads();

  }


  int X = global_tid % imageWidth;
  int Y = global_tid / imageWidth;
  int minClusterIdx = 0;
  float minDist = FLT_MAX;

  // computes distance to all cluster centers and assign pixel to the nearest one
  for(int i = 0; i < numSuperpixels; ++i){
    float cL = clusterInfo[i * 6 + 0];
    float cA = clusterInfo[i * 6 + 1];
    float cB = clusterInfo[i * 6 + 2];
    float cX = clusterInfo[i * 6 + 3];
    float cY = clusterInfo[i * 6 + 4];
    int cID = static_cast<int>(clusterInfo[i * 6 + 5]);

    // Euclidean distance
    float eucDist = sqrtf((cX - X) * (cX - X) + (cY - Y) * (cY - Y));

    // LAB color distance
    float labDist = sqrtf((cL - L[tid]) * (cL - L[tid]) +
                    (cA - A[tid]) * (cA - A[tid]) +
                    (cB - B[tid]) * (cB - B[tid]));

    // Total distance
    float totalDist = sqrtf((labDist / M) * (labDist / M) + (eucDist / S) * (eucDist / S));

    // Gets the nearest cluster's ID
    if(totalDist < minDist){
      minClusterIdx = cID;
      minDist = totalDist;
    }
  }

  // Assigns labelMap location corresponding to the tid, the min cluster ID
  labelMap[global_tid] = minClusterIdx;


}

// Update the cluster center vals

}

"""



In [None]:
# Reads the input RGB image
rgb = io.imread("/content/joel-filipe-QwoNAhbmLLo-unsplash.jpg")
print(rgb.shape)

# Resize images to (400 x 400) for processing
img = resize(rgb, (400,400), anti_aliasing=True)
print(img.shape)

# convert RGB to LAB
img = color.rgb2lab(img)
L, A, B = img[:, :, 0].ravel(), img[:, :, 1].ravel(), img[:, :, 2].ravel()

k = 150   # Number of Super pixels
m = 20    # Constant for normalizing the color proximity, range of m = [1,40]

img_h = img.shape[0] # Image Height
img_w = img.shape[1] # Image Width

N = img_h * img_w  # Total number of pixels in the image
S = int(math.sqrt(N /k)) # average size of each superpixel

# clusters = []
# tag = {}
# # initialize the distance between pixels and cluster center as infinity
# dis = np.full((img_h, img_w), np.inf)

(4610, 3648, 3)
(400, 400, 3)


In [None]:
# Gets the initial cluster center
clusters = initial_cluster_center(S, img, img_h, img_w, clusters)

# Defines a np dtype that contains all important info abt the cluster centers
struct_dtype = np.dtype([
  ("l", np.float32),
  ("a", np.float32),
  ("b", np.float32),
  ("x", np.float32),
  ("y", np.float32),
  ("id", np.float32),
])

cluster_array = np.array([(cluster.l, cluster.a, cluster.b, cluster.h, cluster.w, cluster.id) for cluster in clusters], dtype=struct_dtype)

In [None]:
# Defines compilation options
mod = SourceModule(cuda_kernel, options=["-arch=sm_75"])

assign_clusters_fn = mod.get_function("assignClusterCenters")

# cluster_array is a np array that contains LABXY, ID for each superpixel center
cluster_info_size = cluster_array.nbytes

# size of L, A, B arrays in bytes
size_LAB = N * np.dtype(np.float32).itemsize
size_label_map = N * np.dtype(np.uint8).itemsize
label_map = np.zeros(N, dtype=np.uint8)

d_L = cuda.mem_alloc(size_LAB)
d_A = cuda.mem_alloc(size_LAB)
d_B = cuda.mem_alloc(size_LAB)
d_labelMap = cuda.mem_alloc(size_label_map)
d_clusters = cuda.mem_alloc(cluster_info_size)

# Copies Data to Device
cuda.memcpy_htod(d_L, L)
cuda.memcpy_htod(d_A, A)
cuda.memcpy_htod(d_B, B)
cuda.memcpy_htod(d_labelMap, label_map)
cuda.memcpy_htod(d_clusters, cluster_array)

# CUDA Grid Configuration
threadsPerBlock = 256
numBlocks = (numPixels + threadsPerBlock - 1) // threadsPerBlock

# Compute Shared Memory Size
sizeSmem = 2 * cluster_info_size

# Kernel Launch
assign_clusters_fn(d_L, d_A, d_B, d_clusters, d_labelMap, d_clusters,
                       img_w, img_h, N, M, S,
                       block=(threadsPerBlock, 1, 1), grid=(numBlocks, 1),
                       shared=sizeSmem)


In [None]:
start_time = time.time()
cluster = slic(S, img, img_h, img_w, clusters, tag, dis)
end_time = time.time()

print(f"Time taken is {(end_time - start_time) * 1000} ms")

Time taken is 131309.42249298096 ms


In [None]:
cProfile.run('slic(S, img, img_h, img_w, clusters, tag, dis)', 'profile_results')

In [None]:
stats = pstats.Stats('profile_results')
stats.sort_stats(pstats.SortKey.TIME).print_stats(15)

Wed Feb  5 07:56:45 2025    profile_results

         830874310 function calls (830874307 primitive calls) in 578.676 seconds

   Ordered by: internal time
   List reduced from 299 to 15 due to restriction <15>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       10  446.984   44.698  574.103   57.410 <ipython-input-6-8cd683e35477>:5(assign_pixels_to_cluster)
580425832  100.727    0.000  100.727    0.000 {built-in method math.pow}
248753928   25.630    0.000   25.630    0.000 {built-in method math.sqrt}
       10    3.339    0.334    4.023    0.402 <ipython-input-7-2dcaf0d4e4fe>:2(update_cluster_mean)
    43385    0.757    0.000    0.757    0.000 {method 'remove' of 'list' objects}
  1601879    0.685    0.000    0.685    0.000 <ipython-input-11-cc6432300d62>:8(update)
        1    0.430    0.430    0.527    0.527 <ipython-input-8-d4992ef63ce6>:2(avg_color_cluster)
        1    0.029    0.029    0.038    0.038 /usr/local/lib/python3.11/dist-packages/skimage/co

<pstats.Stats at 0x79c51fd16b50>

In [None]:
# superpixels
for c in cluster:
    print("H {} : W {}, l {}, a {}, b {}".format(c.h,c.w,c.l,c.a,c.b))

H 19 : W 20, l 24.089975801150572, a 6.983582114595571, b -13.266387089229582
H 16 : W 65, l 16.40778156165713, a 8.0141126927899, b -13.93056274157145
H 16 : W 112, l 54.79510797636068, a 2.960649997742648, b -7.700683087250404
H 25 : W 150, l 10.238819499505958, a 9.567046758129457, b -7.46135584808576
H 22 : W 185, l 3.403919588186259, a 4.9411272689862535, b -2.265624862039256
H 19 : W 219, l 12.324725907118658, a 9.593432142641815, b -0.5983476330680915
H 18 : W 257, l 8.092761208054739, a 10.681077429955735, b -9.075347388015853
H 19 : W 299, l 4.921299588520306, a 8.875064284386628, b -8.29229822245907
H 14 : W 339, l 6.623024157664659, a 10.432628886679742, b -8.98276346783824
H 19 : W 378, l 3.6456278365098953, a 5.468407041481507, b -4.6360393064611145
H 60 : W 19, l 5.104836088688547, a 4.198537100423519, b -8.945196661033378
H 54 : W 66, l 4.979812284699815, a 4.975935550010885, b -7.376025677730841
H 56 : W 106, l 21.377268316316183, a 6.909486083425426, b -13.645276743480