In [36]:

import torch

import numpy as np
import open_clip
from open_clip import tokenizer

In [37]:
path2text = './general.txt'

prompts_list = []

with open(path2text, 'r') as f:
    for line in f:
        prompts_list.append(line.strip())


In [38]:
print(prompts_list[:10])

['A futuristic cityscape with flying cars, neon lights, and towering skyscrapers.', 'A mystical forest with a majestic unicorn standing in a clearing, surrounded by glowing flowers.', 'A vibrant underwater world with colorful coral reefs, schools of tropical fish, and a mermaid swimming by.', 'An otherworldly landscape with floating islands, strange rock formations, and a massive portal in the sky.', 'A bustling market in a foreign city, filled with exotic spices, fruits, and handcrafted goods.', 'A dramatic space battle between a fleet of starships and a massive alien mothership, with laser beams and explosions lighting up the scene.', 'A serene mountain landscape with a tranquil lake, towering pine trees, and a small cabin nestled in the woods.', 'A romantic scene with a couple dancing in a moonlit garden, surrounded by blooming flowers and twinkling stars.', 'A post-apocalyptic wasteland with ruins of buildings, abandoned vehicles, and survivors scavenging for resources.', 'A glamor

In [39]:
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='openai', )

In [40]:
text_tokens = tokenizer.tokenize(prompts_list)
with torch.no_grad():
    text_features = model.encode_text(text_tokens).float()

In [69]:

# comput cosine similarity
cos_sim = np.dot(text_features, text_features.T) / (np.linalg.norm(text_features, axis=1)[:, None] * np.linalg.norm(text_features, axis=1)[None, :])
# delete the diagonal entries from the matrix
for i in range(len(cos_sim)):
    cos_sim[i][i] = 1.0

# find the closest text prompts using cosine similarity
top_k = 2
closest_ids = cos_sim.argsort(axis=1)[:, -top_k:][:, ::-1]

for prompt, closest_id in zip(prompts_list, closest_ids):
    print(prompt)
    for idx in closest_id[1:]: # exclude the first one, which is the prompt itself
        print(prompts_list[idx])
    break

A futuristic cityscape with flying cars, neon lights, and towering skyscrapers.
A futuristic cityscape scene with sleek skyscrapers, flying cars, and neon lights.


In [83]:
from sklearn.cluster import KMeans

k = 10

kmeans = KMeans(n_clusters=k, init='k-means++', random_state=33)
kmeans.fit(cos_sim)

centroids = kmeans.cluster_centers_

k_points = []
for i in range(k):
    cluster_indices = np.where(kmeans.labels_ == i)[0]
    closest_point = cluster_indices[np.argmin(cos_sim[cluster_indices][:, cluster_indices].sum(axis=0))]
    k_points.append(closest_point)

print(k_points)

[110, 136, 130, 55, 52, 65, 112, 42, 75, 15]




In [85]:
# Most dissimilar prompts

for idx in k_points:
    print(prompts_list[idx])

A mystical and enchanting scene of a mermaid underwater, surrounded by colorful fish and coral reefs.
A mysterious and intriguing scene of an abandoned library with dusty books, ancient scrolls, and hidden secrets.
A romantic and whimsical scene of a couple dancing under the stars on a fairy tale-inspired balcony.
A whimsical and colorful underwater coral reef scene with vibrant fish, seahorses, and other marine creatures.
A dark and moody forest scene with gnarled trees, misty fog, and eerie shadows lurking between the trunks.
A whimsical and colorful candy land scene with candy canes, lollipops, and gumdrops as far as the eye can see.
A bustling and vibrant scene of a street market in Tokyo, Japan, with neon lights and street food vendors.
A tranquil space scene with a spaceship orbiting a planet, shooting stars, and a constellation of stars in the distance.
A spooky and eerie abandoned hospital scene with flickering lights, medical equipment, and ghostly apparitions.
A romantic scen