In [1]:
import os
from PIL import Image
import torch
from torchvision import transforms
import numpy as np

# Define paths
image_dir = '/content/images'  # Update this with your actual image folder path

# Define the transformation for resizing and normalizing images
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load images from directory
def load_images_from_folder(folder):
    images = []
    filenames = []
    for filename in os.listdir(folder):
        if filename.endswith('.jpg') or filename.endswith('.png'):  # Only load .jpg and .png files
            image_path = os.path.join(folder, filename)
            try:
                image = Image.open(image_path).convert('RGB')
                images.append(transform(image))
                filenames.append(filename)
            except Exception as e:
                print(f"Error loading image {filename}: {e}")
    return images, filenames

images, image_filenames = load_images_from_folder(image_dir)


In [2]:
import torchvision.models as models

# Load a pre-trained ResNet model
model = models.resnet50(pretrained=True)
model.eval()

# Function to extract features from a single image
def extract_features(image_tensor):
    with torch.no_grad():
        features = model(image_tensor.unsqueeze(0))  # Add batch dimension
    return features.squeeze().numpy()

# Extract features for each image
features = [extract_features(image) for image in images]


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 139MB/s]


In [6]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Determine the number of components for PCA based on dataset size
n_samples, n_features = np.array(features).shape
n_components = min(20, n_samples, n_features)  # Adjust this value if needed

# Apply PCA
pca = PCA(n_components=n_components)
pca_features = pca.fit_transform(features)

# Apply t-SNE on the PCA-reduced features
tsne = TSNE(n_components=2, perplexity=4, random_state=0)
reduced_embeddings = tsne.fit_transform(pca_features)



In [8]:
!pip install dash

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: dash-table, dash-html-comp

In [9]:
from dash import Dash, html, dcc
import plotly.express as px

app = Dash(__name__)

# Plot the 2D embeddings
fig = px.scatter(
    x=reduced_embeddings[:, 0],
    y=reduced_embeddings[:, 1],
    text=image_filenames,  # Use filenames as labels for each point
    title="2D Visualization of Image Embeddings"
)

# Dash layout
app.layout = html.Div([
    html.H1("Image Data Embeddings Visualization"),
    dcc.Graph(id='scatter-plot', figure=fig)
])

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>