## imports

In [None]:
import pickle
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN
import cv2
import random

# Part 1

## load dataset

In [None]:
def load_dataset():
  image_pickle_file_path = 'images.pkl'
  label_pickle_file_path = 'label.pkl'

  with open(image_pickle_file_path, 'rb') as file:
    images = pickle.load(file)

  with open(label_pickle_file_path, 'rb') as file:
    labels = pickle.load(file)

  # images = images.reshape(images.shape[0], -1)

  return images, labels

In [None]:
images, labels = load_dataset()
random_indices = np.random.choice(560, size=20, replace=False)
# Extract the randomly selected values
random_values = images[random_indices]


## Proccess on images
- Extract features
- normalize features
- make features a single dimnetion vector

In [None]:
def extract_features(images):
    features = []
    original_features = []

    # For each image in the dataset
    for img in images:

        height, width, _ = img.shape
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # For each pixel in the image
        for i in range(img_hsv.shape[0]):
            for j in range(img_hsv.shape[1]):
                pixel = img_hsv[i, j]
                h, s, v = pixel
                x, y = i, j

                original_features.append((h,s,v,x,y))

                h_weight = 10
                s_weight = 1
                v_weight = 1
                x_weight = 0
                y_weight = 0
                h, s, v = h * h_weight, s * s_weight, v * v_weight
                x, y = x * x_weight, y * y_weight

                features.append((h, s, v, x, y))

    return features, original_features

In [None]:
def normalize_features(features):
    features_array = np.array(features)
    scaler = StandardScaler()
    normalized_features_array = scaler.fit_transform(features_array)
    normalized_features = normalized_features_array.tolist()
    return normalized_features

In [None]:
def process_images_in_chunks(images, chunk_size):
    features = []
    original_features = []
    num_images = len(images)

    for i in range(0, num_images, chunk_size):
        # Extract features for a chunk of images
        chunk_features, chunk_original_features = extract_features(images[i:i+chunk_size])

        # Append the features to the overall features list
        features.extend(chunk_features)
        original_features.extend(chunk_original_features)

        print('<<', i, '>>')

    return features, original_features


In [None]:
# extract features
chunk_size = 10
# features, original_features = process_images_in_chunks(images, chunk_size)
features, original_features = process_images_in_chunks(random_values, chunk_size)

In [None]:
# normalize features
features = normalize_features(features)

## clustering

### k-means

In [None]:
kmeans = KMeans(n_clusters=10)
clusters = kmeans.fit_predict(features)
# clusters = clusters.reshape(images.shape[0], images.shape[1], images.shape[2])
clusters = clusters.reshape(random_values.shape[0], random_values.shape[1], random_values.shape[2])

## display clustering

In [None]:
# Calculate the mean color for each cluster
def cluster_mean_value(kmeans, clusters):
    cluster_means = []
    for cluster in range(kmeans.n_clusters):
        mask = (clusters == cluster)
        cluster_mean = np.mean(random_values * mask[:, :, :, np.newaxis], axis=(0, 1, 2)) / np.mean(mask)
        cluster_means.append(cluster_mean)
    # Convert the mean colors to integer values
    cluster_means = np.round(cluster_means).astype(int)
    return cluster_means

In [None]:
clusters_mean = cluster_mean_value(kmeans, clusters)
# clusters_mean.reshape(clusters_mean.shape[0]*clusters_mean.shape[1])

In [None]:
def display_clusters(cluster_means):
    # Display images with cluster colors
    for i, image in enumerate(random_values):
        cv2.imshow('Original', image)

        # Create a blank image for displaying clustered colors
        cluster_image = np.zeros_like(image)
        

        # Assign the mean color to each pixel based on the cluster assignment
        for cluster, mean_color in enumerate(cluster_means):
            mask = (clusters[i] == cluster)
            cluster_image[mask] = mean_color

        cv2.imshow('Clustered', cluster_image)
        cv2.waitKey(0)

    cv2.destroyAllWindows()

display_clusters(clusters_mean)

## extracting clusters features for each image sepratedly

### mean color for each cluster in each image

In [None]:
import numpy as np

def calculate_mean(first_array, second_array):
    
    mean_values = []
    unique_elements = np.unique(first_array)
        
    for element in unique_elements:
        indices = np.where(first_array == element)
        mean_hsv = np.mean(second_array[indices], axis=0)
        mean_values.append(mean_hsv.tolist())

    return mean_values

In [None]:
mean_color_each_cluster_each_image = []
for i in range(random_values.shape[0]):
    mean_values = calculate_mean(clusters[i], images[i])
    mean_color_each_cluster_each_image.append(np.array(mean_values))

mean_color_each_cluster_each_image = np.array(mean_color_each_cluster_each_image)
print(len(mean_color_each_cluster_each_image))
mean_color_each_cluster_each_image

### variance color for each cluster in each image

In [None]:
import numpy as np

def calculate_variance(first_array, second_array):
    
    variance_values = []
    unique_elements = np.unique(first_array)

    for element in unique_elements:
        indices = np.where(first_array == element)
        var_hsv = np.var(second_array[indices], axis=0)
        variance_values.append(var_hsv.tolist())

    return variance_values


In [None]:
var_color_each_cluster_each_image = []
for i in range(random_values.shape[0]):
    var_values = calculate_variance(clusters[i], random_values[i])
    var_color_each_cluster_each_image.append(np.array(var_values))

var_color_each_cluster_each_image = np.array(var_color_each_cluster_each_image)
var_color_each_cluster_each_image

### size of clusters in each image

In [None]:
def calculate_size(clusters):
    size_each_cluster_each_image = []

    for i in range(clusters.shape[0]):
        unique_clusters, counts = np.unique(clusters[i], return_counts=True)
        each_clusters_size = counts
        size_each_cluster_each_image.append(each_clusters_size)

    size_each_cluster_each_image = np.array(size_each_cluster_each_image)
    return size_each_cluster_each_image

In [None]:
size_each_cluster_each_image = calculate_size(clusters)
size_each_cluster_each_image

# Part 2

## create clusters feature vectors 

In [None]:
def create_clusters_vector(clusters_mean, clusters_variance, clusters_size, clusters_area=None):
    result = []
    for mean, variance, size in zip(clusters_mean, clusters_variance, clusters_size):
        for i in range(3):
            result.append([*mean[i], *variance[i], size])
    return result

In [None]:
def flatten_nested_list(nested_list):
    return [[item for sublist in lst for item in (sublist if isinstance(sublist, list) else [sublist])] for lst in nested_list]

In [None]:
clusters_features = create_clusters_vector(mean_color_each_cluster_each_image, var_color_each_cluster_each_image, size_each_cluster_each_image)
clusters_features = flatten_nested_list(clusters_features)
clusters_features

## clustering clusters

In [None]:
kmeans_2 = KMeans(n_clusters=3)
clusters_features_flat = [np.concatenate([np.array(item[:-1]), item[-1]]) for item in clusters_features]
clusters_2 = kmeans.fit_predict(clusters_features_flat)
# clusters_2 = kmeans.fit_predict(clusters_features)
clusters_2

## Diaplay clustered cluster

In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
pca = PCA(2)
data_transformed = pca.fit_transform(clusters_features_flat)

plt.scatter(data_transformed[:, 0], data_transformed[:, 1], c=clusters_2)
plt.show()
