In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from multiprocessing import Pool
import random

In [2]:
df = pd.read_csv('datasetRGB.csv')
image_array = df[['Red', 'Green', 'Blue']].values

In [None]:
import numpy as np
import random
import multiprocessing as mp

def assign_pixels_to_clusters(pixels, cluster_centers):
    distances = np.linalg.norm(pixels[:, np.newaxis] - cluster_centers, axis=2)
    return np.argmin(distances, axis=1)

def calculate_local_sums(args):
    local_pixels, assignments, k = args
    local_sums = np.zeros((k, 3))
    for i in range(k):
        cluster_pixels = local_pixels[assignments == i]
        if len(cluster_pixels) > 0:
            local_sums[i] = np.sum(cluster_pixels, axis=0)
    return local_sums

def kmeans_parallel(image_pixels, k, iterations, num_processes):
    pool = mp.Pool(processes=num_processes)

    # Randomly select initial cluster centers
    random_indices = random.sample(range(len(image_pixels)), k)
    cluster_centers = [image_pixels[i] for i in random_indices]

    for _ in range(iterations):
        # Assign each pixel to the closest cluster
        assignments = assign_pixels_to_clusters(image_pixels, cluster_centers)

        # Divide pixels and assignments among processes
        split_size = len(image_pixels) // num_processes
        split_pixels = [image_pixels[i:i+split_size] for i in range(0, len(image_pixels), split_size)]
        split_assignments = [assignments[i:i+split_size] for i in range(0, len(assignments), split_size)]

        # Calculate local sums for each cluster in parallel
        local_sums = pool.map(calculate_local_sums, zip(split_pixels, split_assignments, [k]*num_processes))

        # Reduce local sums to find global cluster centers
        global_sums = np.sum(local_sums, axis=0)
        cluster_sizes = np.maximum([np.sum(assignments == i) for i in range(k)], 1)
        cluster_centers = global_sums / cluster_sizes[:, np.newaxis]

    # Save final cluster centers to a text file
    with open('final_cluster_centers.txt', 'w') as f:
        for center in cluster_centers:
            f.write(' '.join(map(str, center)) + '\n')

    # Iterate each pixel of the original image and cluster them according to the final cluster centers
    compressed_pixels = np.zeros_like(image_pixels)
    for i in range(len(image_pixels)):
        distances = np.linalg.norm(image_pixels[i] - cluster_centers, axis=1)
        compressed_pixels[i] = cluster_centers[np.argmin(distances)]

    # Reshape the compressed pixels to the original image shape
    compressed_image = compressed_pixels.reshape(image_shape)
    compressed_image.save(compressed.jpg)

if __name__ == '__main__':
    # Load the image and convert it to a matrix of RGB values
    image = image_array  # Load your image using a suitable library
    image_shape = image.shape
    image_pixels = image.reshape(-1, 3)

    # Specify the number of clusters (k), iterations, and number of processes
    k = 5
    iterations = 10
    num_processes = 4

    # Perform k-means clustering in parallel
    kmeans_parallel(image_pixels, k, iterations, num_processes)
