# Clustering with image
using imagebind llm embeddings

### Install Necessary Libraries

In [16]:
!pip install tensorflow
!pip install scikit-learn
!pip install matplotlib
!pip install pillow




### Import Libraries:

In [17]:
import tensorflow as tf
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

### Load Local Images Function:



In [23]:
def load_local_image(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return None
    try:
        img = Image.open(file_path).convert('RGB')  # Convert to RGB
        img = img.resize((224, 224))  # Resize for the model
        return np.array(img)
    except IOError as e:
        print(f"Error loading image from {file_path}: {e}")
        return None

local_image_paths = ["/content/Image1.png", "/content/Image2.png"]
images = [load_local_image(path) for path in local_image_paths]
images = [image for image in images if image is not None]

# Check the shape of each image
for i, img in enumerate(images):
    print(f"Image {i} shape: {img.shape}")


Image 0 shape: (224, 224, 3)
Image 1 shape: (224, 224, 3)


### Feature Extraction with a Pre-trained Model:

In [24]:
model = tf.keras.applications.MobileNetV2(include_top=False, pooling='avg', input_shape=(224, 224, 3))

def get_embeddings(images):
    images_batch = np.array(images)  # This should now create a 4D array
    print("Shape of images_batch:", images_batch.shape)

    images_batch = tf.keras.applications.mobilenet_v2.preprocess_input(images_batch)
    features = model.predict(images_batch)
    return features




In [25]:
embeddings = get_embeddings(images)

Shape of images_batch: (2, 224, 224, 3)


### Clustering with KMeans

In [27]:
n_clusters = 2  # Adjusted to the number of available samples
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(embeddings)
labels = kmeans.labels_




### Visualization of Clusters

In [30]:
def display_cluster(images, labels, cluster_number):
    # Filter images for the specified cluster
    cluster_images = [img for img, label in zip(images, labels) if label == cluster_number]

    # Determine the number of images in the cluster (max 5)
    n_images = min(len(cluster_images), 5)

    # Create subplots
    fig, ax = plt.subplots(1, n_images, figsize=(n_images * 4, 4)) if n_images > 1 else plt.subplots(1, n_images, figsize=(4, 4))
    ax = [ax] if n_images == 1 else ax.ravel()  # Ensure ax is always a list

    # Display each image in the cluster
    for i, img in enumerate(cluster_images):
        if i < 5:  # Display only the first 5 images of the cluster
            ax[i].imshow(img)
            ax[i].set_axis_off()

    plt.tight_layout()
    plt.show()
