<a href="https://colab.research.google.com/github/tirtthshah/text-to-image-pipeline/blob/main/Task_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset

dataset = load_dataset("jxie/flickr8k", split="train")
print(dataset[0])

In [None]:
from datasets import load_dataset

dataset = load_dataset("jxie/flickr8k", split="train")
images = [sample["image"] for sample in dataset.select(range(200))]

In [None]:
import numpy as np
from PIL import Image
import cv2

def extract_hsv_histogram(img, bins=(8, 8, 8)):
    hsv = img.convert("HSV")
    hist = np.histogramdd(np.array(hsv).reshape(-1, 3), bins=bins, range=[(0,256)]*3)[0]
    return hist.flatten() / hist.sum()

color_features = np.array([extract_hsv_histogram(img.resize((64, 64))) for img in images])


def extract_edge_features(img):
    gray = np.array(img.convert("L").resize((64, 64)))
    edges = cv2.Canny(gray, 100, 200)
    return edges.flatten() / 255.0

shape_features = np.array([extract_edge_features(img) for img in images])

In [None]:
from sklearn.manifold import Isomap

shape_iso = Isomap(n_neighbors=10, n_components=2).fit_transform(shape_features)
color_iso = Isomap(n_neighbors=10, n_components=2).fit_transform(color_features)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

def plot_embedding(embedding, images, title):
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.set_title(title)
    for xy, img in zip(embedding, images):
        im = OffsetImage(img.resize((32, 32)), zoom=1)
        ab = AnnotationBbox(im, xy, frameon=True, bboxprops=dict(edgecolor='black', linewidth=1))
        ax.add_artist(ab)
    plt.axis("off")
    plt.show()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn.neighbors import NearestNeighbors

def plot_embedding(embedding, images, title):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.set_title(title, fontsize=20)

    nbrs = NearestNeighbors(n_neighbors=5).fit(embedding)
    connections = nbrs.kneighbors_graph(embedding).toarray()
    for i in range(len(embedding)):
        for j in range(len(embedding)):
            if connections[i][j]:
                ax.plot([embedding[i, 0], embedding[j, 0]],
                        [embedding[i, 1], embedding[j, 1]],
                        color='green', linewidth=1, alpha=0.3)

    for xy, img in zip(embedding, images):
        im = OffsetImage(img.resize((32, 32)), zoom=0.6)
        im.set_alpha(0.9)
        ab = AnnotationBbox(im, xy, frameon=True,
                            bboxprops=dict(edgecolor='black', linewidth=0.8))
        ax.add_artist(ab)

    ax.set_xticks([])
    ax.set_yticks([])
    plt.grid(False)
    plt.tight_layout()
    plt.show()

In [None]:
def plot_embedding(embedding, images, title):
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.set_title(title, fontsize=20)

    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=5).fit(embedding)
    connections = nbrs.kneighbors_graph(embedding).toarray()

    for i in range(len(embedding)):
        for j in range(len(embedding)):
            if connections[i][j]:
                ax.plot([embedding[i, 0], embedding[j, 0]],
                        [embedding[i, 1], embedding[j, 1]],
                        color='green', linewidth=1, alpha=0.3)

    for xy, img, sample in zip(embedding, images, dataset.select(range(len(images)))):
        caption = sample["caption_0"][:30] + "..."
        im = OffsetImage(img.resize((32, 32)), zoom=0.6)
        im.set_alpha(0.9)
        ab = AnnotationBbox(im, xy, frameon=True,
                            bboxprops=dict(edgecolor='black', linewidth=0.8))
        ax.add_artist(ab)
        ax.text(xy[0], xy[1] - 0.05, caption, fontsize=6, ha='center')

    ax.set_xticks([])
    ax.set_yticks([])
    plt.grid(False)
    plt.tight_layout()
    plt.show()

In [None]:
import matplotlib.pyplot as plt
import random

samples = dataset.select(range(9))
fig, axes = plt.subplots(3, 3, figsize=(10, 10))

for i, ax in enumerate(axes.flat):
    img = samples[i]["image"]
    caption = samples[i]["caption_0"]
    ax.imshow(img)
    ax.set_title(caption[:40] + "...", fontsize=8)
    ax.axis("off")
    indices = random.sample(range(len(dataset)), 9)
    samples = dataset.select(indices)


plt.tight_layout()
plt.show()

In [None]:
plot_embedding(shape_iso, images, "Shape Isomap")
plot_embedding(color_iso, images, "Color Isomap")