## imports

In [None]:
import pickle
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.cluster import DBSCAN
import cv2
import random

# Part 1

## load dataset

In [None]:
def load_dataset():
  image_pickle_file_path = 'images.pkl'
  label_pickle_file_path = 'label.pkl'

  with open(image_pickle_file_path, 'rb') as file:
    images = pickle.load(file)

  with open(label_pickle_file_path, 'rb') as file:
    labels = pickle.load(file)

  # images = images.reshape(images.shape[0], -1)

  return images, labels

In [None]:
images, labels = load_dataset()
random_indices = np.random.choice(560, size=15, replace=False)
# Extract the randomly selected values
random_values = images[random_indices]


## Proccess on images
- Extract features
- normalize features
- make features a single dimnetion vector

In [None]:
from dataclasses import dataclass


@dataclass
class Image:
    image = None
    features = None
    clusters = None

class ImageProccess:
    image_: Image

    def extract_features(self):
        ...

    def normalize_features(self):
        ...


In [None]:
def extract_features(image):
    features = []
    original_features = []

    # height, width, _ = image.shape
    img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # For each pixel in the image
    for i in range(img_hsv.shape[0]):
        for j in range(img_hsv.shape[1]):
            pixel = img_hsv[i, j]
            h, s, v = pixel
            x, y = i, j

            original_features.append((h,s,v,x,y))

            h_weight = 10
            s_weight = 1
            v_weight = 1
            x_weight = 0
            y_weight = 0
            h, s, v = h * h_weight, s * s_weight, v * v_weight
            x, y = x * x_weight, y * y_weight

            features.append((h, s, v, x, y))

    return features, original_features

In [None]:
def normalize_features(features):
    features_array = np.array(features)
    scaler = StandardScaler()
    normalized_features_array = scaler.fit_transform(features_array)
    normalized_features = normalized_features_array.tolist()
    return normalized_features

In [None]:
# def process_images_in_chunks(images, chunk_size):
#     features = []
#     original_features = []
#     num_images = len(images)

#     for i in range(0, num_images, chunk_size):
#         # Extract features for a chunk of images
#         chunk_features, chunk_original_features = extract_features(images[i:i+chunk_size])

#         # Append the features to the overall features list
#         features.extend(chunk_features)
#         original_features.extend(chunk_original_features)

#         print('<<', i, '>>')

#     return features, original_features


In [None]:
# extract features
# chunk_size = 10
# features, original_features = process_images_in_chunks(images, chunk_size)

new_features = []

for img in random_values:
    features, original_features = extract_features(img)
    new_features.append(features)
    

In [None]:
# normalize features
for i, fimg in enumerate(new_features):
    new_features[i] = normalize_features(fimg)

## clustering

### k-means

In [None]:
kmeans = KMeans(n_clusters=10)
new_clusters = []
for f in new_features:
    clusters = kmeans.fit_predict(f)
    new_clusters.append(clusters)
# clusters = clusters.reshape(images.shape[0], images.shape[1], images.shape[2])

for i, c in enumerate(new_clusters):
    new_clusters[i] = c.reshape(random_values.shape[1], random_values.shape[2])

In [None]:
new_clusters[0].shape

## display clustering

In [None]:
# Calculate the mean color for each cluster
# def cluster_mean_value(kmeans, clusters):
#     cluster_means = []
#     for i, cluster in enumerate(clusters):
#         mask = (clusters[i] == i)
#         cluster_mean = np.mean(random_values[0] * mask[:, :, np.newaxis], axis=(0, 1, 2)) / np.mean(mask)
#         cluster_means.append(cluster_mean)
#     # Convert the mean colors to integer values
#     cluster_means = np.round(cluster_means).astype(int)
#     return cluster_means

def cluster_mean_value(clusters):
    cluster_means = []
    for i, cluster in enumerate(clusters):
        mask = (clusters[i] == i)
        cluster_mean = np.mean(random_values * mask[:, np.newaxis], axis=(0, 1, 2)) / np.mean(mask)
        cluster_means.append(cluster_mean)
    # Convert the mean colors to integer values
    cluster_means = np.round(cluster_means).astype(int)
    return cluster_means

In [None]:
new_cluster_mean = []
for nc in new_clusters:
    clusters_mean = cluster_mean_value(nc)
    new_cluster_mean.append(clusters_mean)
# clusters_mean.reshape(clusters_mean.shape[0]*clusters_mean.shape[1])

In [None]:
def display_clusters(cluster_means):
    # Display images with cluster colors
    for i, image in enumerate(random_values):
        cv2.imshow('Original', image)

        # Create a blank image for displaying clustered colors
        cluster_image = np.zeros_like(image)
        

        # Assign the mean color to each pixel based on the cluster assignment
        # print(len(cluster_means))
        for cluster, mean_color in enumerate(cluster_means):
            mask = (new_clusters[i] == cluster)
            cluster_image[mask] = mean_color

        cv2.imshow('Clustered', cluster_image)
        cv2.waitKey(0)

    cv2.destroyAllWindows()


display_clusters(new_cluster_mean)

## extracting clusters features for each image sepratedly

### mean color for each cluster in each image

In [None]:
def mean_find_matching_triads(array1, array2):   
    print(array1.shape, array2.shape)

    unique_values = np.unique(array1)
    
    print(unique_values)
    
    result = []

    for value in unique_values:
        indexes = np.where(array1 == value)[0]
        triads = array2[indexes, :].tolist()
        result.append(triads)

    return result

In [None]:
def calculate_mean(data):
    data_array = np.array(data)
    mean_values = np.mean(data_array, axis=0)
    mean_values = mean_values.astype(int)
    mean_values_list = mean_values.tolist()
    return mean_values_list



In [None]:
def mean_forward_each_image(clusters, random_values):

    mean_values = []
    reshaped_clusters = clusters.reshape(clusters.shape[0], clusters.shape[1] * clusters.shape[2])
    reshaped_random_values = random_values.reshape(random_values.shape[0],
                                                    random_values.shape[1] * random_values.shape[2],
                                                    random_values.shape[3])
    for i in range(random_values.shape[0]):
        similar_clusters_pixels = mean_find_matching_triads(reshaped_clusters[i],
                                                        reshaped_random_values[i])
        each_cluster_mean_values = []
        print(len(similar_clusters_pixels))
        for j in range(len(similar_clusters_pixels)):
            each_cluster_mean_values.append(calculate_mean(similar_clusters_pixels[j]))
        
        mean_values.append(each_cluster_mean_values)

    return mean_values


In [None]:
mean_color_each_cluster_each_image = mean_forward_each_image(clusters, random_values)

### variance color for each cluster in each image

In [None]:
def var_find_matching_triads(array1, array2):   

    unique_values = np.unique(array1)

    result = []

    for value in unique_values:
        indexes = np.where(array1 == value)[0]
        triads = array2[indexes, :].tolist()
        result.append(triads)

    return result

In [None]:
def calculate_var(data):
    data_array = np.array(data)
    var_values = np.var(data_array, axis=0)
    var_values = var_values.astype(int)
    var_values_list = var_values.tolist()
    return var_values_list



In [None]:
def var_forward_each_image(clusters, random_values):
    similar_clusters_pixels = []

    var_values = []
    reshaped_clusters = clusters.reshape(clusters.shape[0], clusters.shape[1] * clusters.shape[2])
    reshaped_random_values = random_values.reshape(random_values.shape[0],
                                                    random_values.shape[1] * random_values.shape[2],
                                                    random_values.shape[3])
    for i in range(random_values.shape[0]):
        similar_clusters_pixels = var_find_matching_triads(reshaped_clusters[i],
                                                        reshaped_random_values[i])
        each_cluster_var_values = []
        for j in range(len(similar_clusters_pixels)):
            each_cluster_var_values.append(calculate_var(similar_clusters_pixels[j]))

        var_values.append(each_cluster_var_values)

    return var_values


In [None]:
var_color_each_cluster_each_image = var_forward_each_image(clusters, random_values)
for i in var_color_each_cluster_each_image:
    print(len(i) != 10)

### size of clusters in each image

In [None]:
def calculate_size(clusters):
    size_each_cluster_each_image = []

    for i in range(clusters.shape[0]):
        unique_clusters, counts = np.unique(clusters[i], return_counts=True)
        each_clusters_size = counts
        size_each_cluster_each_image.append(each_clusters_size)

    # size_each_cluster_each_image = np.array(size_each_cluster_each_image)
    return size_each_cluster_each_image

In [None]:
size_each_cluster_each_image = calculate_size(clusters)
size_each_cluster_each_image

# Part 2

## create clusters feature vectors 

In [None]:
def create_clusters_vector(clusters_mean, clusters_variance):
    result = []
    for mean, variance in zip(clusters_mean, clusters_variance):
        if (len(mean) != 10 or len(variance) != 10):
            print("Jib")
            continue
        for i in range(10):
            result.append([*mean[i], *variance[i]])
    return result

In [None]:
def flatten_nested_list(nested_list):
    return [[item for sublist in lst for item in (sublist if isinstance(sublist, list) else [sublist])] for lst in nested_list]

In [None]:
clusters_features = create_clusters_vector(mean_color_each_cluster_each_image, var_color_each_cluster_each_image)
# clusters_features = flatten_nested_list(clusters_features)
clusters_featuresss = np.array(clusters_features)
clusters_featuresss

## clustering clusters

In [None]:
kmeans_2 = KMeans(n_clusters=3)
clusters_features_flat = [np.concatenate([np.array(item[:-1]), item[-1]]) for item in clusters_features]
clusters_2 = kmeans.fit_predict(clusters_features_flat)
# clusters_2 = kmeans.fit_predict(clusters_features)
clusters_2

## Diaplay clustered cluster

In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
pca = PCA(2)
data_transformed = pca.fit_transform(clusters_features_flat)

plt.scatter(data_transformed[:, 0], data_transformed[:, 1], c=clusters_2)
plt.show()
