In [None]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
import cv2

In [None]:
def load_dataset():
  image_pickle_file_path = 'images.pkl'
  label_pickle_file_path = 'label.pkl'

  with open(image_pickle_file_path, 'rb') as file:
    images = pickle.load(file)

  with open(label_pickle_file_path, 'rb') as file:
    labels = pickle.load(file)

  # images = images.reshape(images.shape[0], -1)

  return images, labels

In [None]:
images, labels = load_dataset()
images = images[:20]

In [None]:
from sklearn.preprocessing import StandardScaler

def extract_features(images):
    features = []

    # For each image in the dataset

    for img in images:
        # Convert the image to HSV color space

        # Reshape the image back to a 2D array
        # img_hsv = img_hsv.reshape((height, width, 3))
        height, width, _ = img.shape
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # For each pixel in the image
        for i in range(img_hsv.shape[0]):
            for j in range(img_hsv.shape[1]):
                pixel = img_hsv[i, j]
                h, s, v = pixel
                
                # Normalize spatial features
                # Normalize spatial features
                x, y = i, j
                
                h_weight = 0.2
                s_weight = 0.2
                v_weight = 0.1
                color_weight = h_weight + s_weight + v_weight
                spatial_weight = 1 - (color_weight)
                h, s, v = h * color_weight, s * color_weight, v * color_weight
                x, y = x * spatial_weight, y * spatial_weight    

                # Now you can use h, s, v and x, y values
                features.append((h, s, v, x, y))

    return features

In [None]:
def process_images(images):
    features = []
    for img in images:
        # Reshape the image back to a 2D array
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        height, width, _ = img.shape
        img = img.reshape((height, width, 3))
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # For each pixel in the image
        for i in range(img_hsv.shape[0]):
            for j in range(img_hsv.shape[1]):
                pixel = img_hsv[i, j]
                b, g, r = pixel
                
                # Normalize color features
                r, g, b = r / 255, g / 255, b / 255

                # Normalize spatial features
                x, y = i / height, j / width

                red_weight = 0.2
                blue_weight = 0.2
                green_weight = 0.05
                color_weight = red_weight + green_weight + blue_weight
                spatial_weight = 1 - (color_weight)
                r, g, b = r * color_weight, g * color_weight, b * color_weight
                x, y = x * spatial_weight, y * spatial_weight    
                # Now you can use r, g, b and x, y values
                features.append((r, g, b, x, y))

    return features

In [None]:
def process_images2(images):
    features = []
    for img in images:
        # Reshape the image back to a 2D array
        height, width, _ = img.shape
        img = img.reshape((height, width, 3))
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # For each pixel in the image
        for i in range(img_hsv.shape[0]):
            for j in range(img_hsv.shape[1]):
                pixel = img_hsv[i, j]
                h, s, v = pixel

                # Normalize color features
                min_pixel = np.min(pixel)
                max_pixel = np.max(pixel)
                new_min = 0
                new_max = 1

                h = (h - min_pixel) * (new_max - new_min) / (max_pixel - min_pixel) + new_min
                s = (s - min_pixel) * (new_max - new_min) / (max_pixel - min_pixel) + new_min
                v = (v - min_pixel) * (new_max - new_min) / (max_pixel - min_pixel) + new_min

                # Normalize spatial features
                x, y = i / height, j / width

                hue_weight = 0.4
                saturation_weight = 0.3
                value_weight = 0.1
                color_weight = hue_weight + saturation_weight + value_weight
                spatial_weight = 1 - (color_weight)
                h, s, v = h * hue_weight, s * saturation_weight, v * value_weight
                x, y = x * spatial_weight, y * spatial_weight
                

                # Now you can use h, s, v and x, y values
                features.append((h, s, v, x, y))

    return features


In [None]:
features = process_images2(images)
print(features, type(features[0]))

In [None]:
from sklearn.preprocessing import StandardScaler

# Assume features is a 2D array where each row is a feature vector
scaler = StandardScaler()
features = scaler.fit_transform(features)


In [None]:
def classify(datapoints, labels):
    test_size = 0.2
    X_train, X_test = train_test_split(datapoints, test_size=test_size, random_state=42)

    kmeans = KMeans(n_clusters=100, random_state=42)

    kmeans.fit(X_train)

    y_pred = kmeans.predict(X_test)

    accuracy = accuracy_score(labels, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    return kmeans.labels_

In [None]:
clusters = classify(images, labels)

In [None]:
from sklearn.cluster import KMeans

# Assume k is the number of clusters
kmeans = KMeans(n_clusters=5)
clusters = kmeans.fit_predict(features)


In [None]:
labels = kmeans.predict(features)
labels = labels.reshape(images.shape[0], images.shape[1], images.shape[2])

In [None]:
# Assume clusters is a 1D array where each element is the cluster ID of a feature
for cluster_id in np.unique(clusters):
    # Get the features in the cluster
    cluster_features = features[clusters == cluster_id]
    
    # Calculate the average distance between the pixels
    avg_distance = np.mean(np.sqrt(np.sum((cluster_features[:, 3:5] - cluster_features[:, 3:5].mean(axis=0))**2, axis=1)))
    
    # Calculate the standard deviation of the color features
    color_std = np.std(cluster_features[:, :3], axis=0)
    
    # Print the results
    print(f"Cluster {cluster_id}: average distance = {avg_distance}, color standard deviation = {color_std}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

clusters = np.array(clusters)
features = np.array(features)


# Assume clusters is a 1D array where each element is the cluster ID of a feature
# and features is a 2D array where each row is a feature vector
for cluster_id in np.unique(clusters):
    # Get the features in the cluster    
    cluster_features = features[clusters == cluster_id]
    
    # Plot the features in the cluster
    plt.scatter(cluster_features[:, 3], cluster_features[:, 4], label=f"Cluster {cluster_id}")

# Add a legend
plt.legend()

# Show the plot
plt.show()


In [None]:
for i, image in enumerate(images[:10]):
    for x in range(image.shape[0]):
        for y in range(image.shape[1]):
            if labels[i, x, y] == 0:
                image[x, y, :] = [0, 0, 0]
            elif labels[i, x, y] == 1:
                image[x, y, :] = [0, 0, 255]
            elif labels[i, x, y] == 2:
                image[x, y, :] = [0, 255, 0]
            elif labels[i, x, y] == 3:
                image[x, y, :] = [255, 0, 0]
            else:
                image[x, y, :] = [255, 255, 255]
    cv2.imshow('Image', image)
    cv2.waitKey(0)
cv2.destroyAllWindows()
