In [1]:
from google.colab import drive
drive.mount('/K_means/')

Mounted at /K_means/


In [2]:

import os
import shutil
import numpy as np
from sklearn.cluster import KMeans
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Step 1: Extract Features from Images
def extract_features(image_path, model):
    """Extract features from an image using a pre-trained model."""
    image = load_img(image_path, target_size=(299, 299))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)
    features = model.predict(image)
    return features.flatten()

def extract_features_from_folder(folder_path, model):
    """Extract features for all images in a folder."""
    features = []
    image_paths = []
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if file_name.lower().endswith(('jpg', 'jpeg', 'png')):
            features.append(extract_features(file_path, model))
            image_paths.append(file_path)
    return np.array(features), image_paths

In [3]:
# Load InceptionV3 pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')

# Path to your dataset folder
input_folder = "/K_means/MyDrive/GroundNut/VT_DATASET/GROUND_NUT_UNIQUE/aphids"

# Extract features and get image paths
features, image_paths = extract_features_from_folder(input_folder, base_model)
print(f"Extracted features for {len(features)} images.")

from sklearn.decomposition import PCA
# Step 1: Apply PCA to reduce dimensions
def apply_pca(features, n_components=50):
    """Reduce dimensions of features using PCA."""
    pca = PCA(n_components=n_components, random_state=42)
    reduced_features = pca.fit_transform(features)
    print(f"PCA: Reduced features from {features.shape[1]} to {n_components} dimensions.")
    return reduced_features

# Reduce dimensions of the extracted features
n_components = 50  # You can adjust this based on your dataset
reduced_features = apply_pca(features, n_components=n_components)

# Step 2: Apply K-Means Clustering
def apply_kmeans(features, n_clusters):
    """Cluster features using K-Means."""
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(features)
    print(f"KMeans clustering completed with {n_clusters} clusters.")
    return labels

# Define the number of clusters (you can adjust this based on your dataset)
num_clusters = 10
cluster_labels = apply_kmeans(reduced_features, num_clusters)





Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

In [4]:
# Step 3: Organize Images into Folders Based on Clusters
def save_images_to_clusters(image_paths, cluster_labels, output_folder):
    """Save images into cluster-specific folders."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, image_path in enumerate(image_paths):
        cluster_folder = os.path.join(output_folder, f"cluster_{cluster_labels[i]}")
        if not os.path.exists(cluster_folder):
            os.makedirs(cluster_folder)

        # Copy the image to the cluster folder
        shutil.copy(image_path, cluster_folder)

    print(f"Images have been organized into {len(set(cluster_labels))} cluster folders.")

# Path to save clustered images
output_folder = "/K_means/MyDrive/Find_similarity/k_cluster_folder"

# Save images into cluster folders
save_images_to_clusters(image_paths, cluster_labels, output_folder)

Images have been organized into 10 cluster folders.


In [5]:

from sklearn.metrics.pairwise import cosine_similarity
# Step 3: Organize Images into Folders Based on Clusters and Sort by Similarity
def save_images_to_clusters_sorted(image_paths, features, cluster_labels, output_folder):
    """Save images into cluster-specific folders, sorted by similarity."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for cluster_id in set(cluster_labels):
        # Get all images in the current cluster
        cluster_indices = np.where(cluster_labels == cluster_id)[0]
        cluster_features = features[cluster_indices]
        cluster_image_paths = [image_paths[i] for i in cluster_indices]

        # Compute similarity matrix for the cluster
        similarity_matrix = cosine_similarity(cluster_features)
        reference_index = 0  # Choose the first image as the reference

        # Sort images by their similarity to the reference image
        similarity_scores = similarity_matrix[reference_index]
        sorted_indices = np.argsort(-similarity_scores)  # Sort in descending order

        # Create the cluster folder
        cluster_folder = os.path.join(output_folder, f"cluster_{cluster_id}")
        if not os.path.exists(cluster_folder):
            os.makedirs(cluster_folder)

        # Save images into the cluster folder with sorted file names
        for new_index, original_index in enumerate(sorted_indices):
            original_image_path = cluster_image_paths[original_index]
            new_file_name = f"{new_index + 1:03d}.jpg"  # Rename as 001.jpg, 002.jpg, etc.
            new_image_path = os.path.join(cluster_folder, new_file_name)

            # Copy the image to the new location
            shutil.copy(original_image_path, new_image_path)

        print(f"Cluster {cluster_id} sorted and saved with {len(cluster_indices)} images.")

# Path to save clustered images
output_folder = "/K_means/MyDrive/Find_similarity/ascending_cluster"

# Save images into cluster folders, sorted by similarity
save_images_to_clusters_sorted(image_paths, features, cluster_labels, output_folder)


Cluster 0 sorted and saved with 71 images.
Cluster 1 sorted and saved with 105 images.
Cluster 2 sorted and saved with 101 images.
Cluster 3 sorted and saved with 47 images.
Cluster 4 sorted and saved with 67 images.
Cluster 5 sorted and saved with 91 images.
Cluster 6 sorted and saved with 53 images.
Cluster 7 sorted and saved with 61 images.
Cluster 8 sorted and saved with 92 images.
Cluster 9 sorted and saved with 50 images.
