In [None]:
import os
import shutil
import cv2
import numpy as np
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from sklearn.cluster import KMeans
from tqdm import tqdm

In [None]:
# Function to create subdirectories if they don't exist
def create_subdirectories():
    subdirectories = ['boots', 'shoes', 'sandals']
    for subdir in subdirectories:
        if not os.path.exists(subdir):
            os.makedirs(subdir)

In [None]:
# Function to extract VGG16 features from an image
def extract_features(image_path, vgg_model):
    img = image.load_img(image_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    features = vgg_model.predict(x)
    return features.flatten()

In [None]:
# Function to sort images into their respective categories
def sort_images(dataset_path):
    create_subdirectories()

    # Load pre-trained VGG16 model
    vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # List to store image features and paths
    image_features = []
    image_paths = []

    # Extract features for each image in the dataset
    for image_file in tqdm(os.listdir(dataset_path),ncols=150):
        image_path = os.path.join(dataset_path, image_file)
        if os.path.isfile(image_path):
            features = extract_features(image_path, vgg_model)
            image_features.append(features)
            image_paths.append(image_path)

    # Cluster images using K-means
    kmeans = KMeans(n_clusters=3)
    clusters = kmeans.fit_predict(image_features)

    # Move images to their respective subdirectories based on the clusters
    for i, cluster_id in enumerate(clusters):
        image_file = os.path.basename(image_paths[i])
        if cluster_id == 0:
            destination_dir = os.path.join('boots', image_file)
        elif cluster_id == 1:
            destination_dir = os.path.join('shoes', image_file)
        elif cluster_id == 2:
            destination_dir = os.path.join('sandals', image_file)
        else:
            continue

        shutil.copy(image_paths[i], destination_dir)

In [None]:
if __name__ == "__main__":
    # Replace 'path_to_dataset' with the path to the extracted dataset folder
    dataset_path = '/kaggle/input/shoes-boots-sandals/shoe_boot_sandal'

    # Sort images into subdirectories
    sort_images(dataset_path)