In [6]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
from keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from keras.models import Model

def extract_color_histogram(image, bins=(8, 8, 8)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

def extract_deep_features(image_path, model):
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
    features = model.predict(image)
    return features.flatten()

# Load VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

# Example: Extract features from images in a folder
image_folder = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_known'
color_features = []
deep_features = []
image_paths = []

for image_name in os.listdir(image_folder):
    image_path = os.path.join(image_folder, image_name)
    image = cv2.imread(image_path)
    
    color_hist = extract_color_histogram(image)
    deep_feat = extract_deep_features(image_path, model)
    
    color_features.append(color_hist)
    deep_features.append(deep_feat)
    image_paths.append(image_path)

color_features = np.array(color_features)
deep_features = np.array(deep_features)




In [7]:
# Combine features (you can choose to use only color or deep features or both)
features = np.hstack([color_features, deep_features])

# Normalize features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Clustering
num_clusters = 6  # Choose the number of clusters
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
labels = kmeans.fit_predict(features)


  super()._check_params_vs_input(X, default_n_init=10)


In [8]:
# Create folders for clusters and move images
output_folder = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_known_clustered'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for i in range(num_clusters):
    cluster_folder = os.path.join(output_folder, f'cluster_{i}')
    if not os.path.exists(cluster_folder):
        os.makedirs(cluster_folder)

for label, image_path in zip(labels, image_paths):
    image_name = os.path.basename(image_path)
    cluster_folder = os.path.join(output_folder, f'cluster_{label}')
    cv2.imwrite(os.path.join(cluster_folder, image_name), cv2.imread(image_path))
