In [1]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Path to the folder containing your images
folder_path = "E7-images"

# Initialize a list to store the loaded images
images = []

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    # Check if the file is an image (JPEG or PNG)
    if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
        # Construct the full path to the image file
        img_path = os.path.join(folder_path, filename)
        
        # Read the image
        image = cv2.imread(img_path)
        
        # Append the image to the list
        images.append(image)

# Flatten images
flattened_images = np.array([image.flatten() for image in images])

# Normalize pixel values to [0, 1]
flattened_images = flattened_images / 255.0

# Define and train the autoencoder
autoencoder = Sequential([
    Dense(128, activation='relu', input_shape=(flattened_images.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(64, activation='relu'),
    Dense(128, activation='relu'),
    Dense(flattened_images.shape[1], activation='sigmoid')
])

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(flattened_images, flattened_images, epochs=10, batch_size=32)

# Extract features using the encoder part
encoder = Sequential(autoencoder.layers[:3])
autoencoder_features = encoder.predict(flattened_images)

# Perform KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(autoencoder_features)
labels = kmeans.labels_

# Evaluate clustering
silhouette_avg_autoencoder = silhouette_score(autoencoder_features, labels)
print("Silhouette Score (Autoencoder):", silhouette_avg_autoencoder)

# Group samples by cluster
cluster_samples_autoencoder = [[] for _ in range(5)]
for idx, label in enumerate(labels):
    cluster_samples_autoencoder[label].append(images[idx])

# Print 5 randomly picked samples from each cluster
for cluster_idx, samples in enumerate(cluster_samples_autoencoder):
    print("Cluster", cluster_idx + 1)
    random_samples = random.sample(samples, min(5, len(samples)))
    for i, sample in enumerate(random_samples):
        cv2.imshow(f"Cluster {cluster_idx + 1} Sample {i + 1}", sample)
    cv2.waitKey(0)
    cv2.destroyAllWindows()





Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Silhouette Score (Autoencoder): 0.37035838
Cluster 1
Cluster 2
Cluster 3
Cluster 4
Cluster 5
