In [22]:
import pickle
from gensim.models import KeyedVectors
from transformers import CLIPProcessor, CLIPModel, AutoTokenizer
import torch
import numpy as np

In [2]:
class WordEmbedder(torch.nn.Module):

    def __init__(self):
        super(WordEmbedder, self).__init__()
        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        # self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
        self.tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")

    def forward(self, input_texts):
        processed_inputs = self.tokenizer(input_texts, padding=True, truncation=True, return_tensors="pt")
        outputs = self.model.get_text_features(**processed_inputs)
        return outputs

In [3]:
embed = WordEmbedder()

In [4]:
adjectives = ['dark', 'bright', 'red', 'blue']

In [5]:
class_names = [
 'aero imagery of Airport',
 'aero imagery of BareLand',
 'aero imagery of BaseballField',
 'aero imagery of Beach',
 'aero imagery of Bridge',
 'aero imagery of Center',
 'aero imagery of Church',
 'aero imagery of Commercial',
 'aero imagery of DenseResidential',
 'aero imagery of Desert',
 'aero imagery of Farmland',
 'aero imagery of Forest',
 'aero imagery of Industrial',
 'aero imagery of Meadow',
 'aero imagery of MediumResidential',
 'aero imagery of Mountain',
 'aero imagery of Park',
 'aero imagery of Parking',
 'aero imagery of Playground',
 'aero imagery of Pond',
 'aero imagery of Port',
 'aero imagery of RailwayStation',
 'aero imagery of Resort',
 'aero imagery of River',
 'aero imagery of School',
 'aero imagery of SparseResidential',
 'aero imagery of Square',
 'aero imagery of Stadium',
 'aero imagery of StorageTanks',
 'aero imagery of Viaduct'
]
 

In [6]:
modified_class_names = [
    f'{adj} {class_label}' for adj in adjectives for class_label in class_names
]

In [24]:
with torch.no_grad():
    embeddings = embed(class_names + modified_class_names + adjectives)
kv = KeyedVectors(vector_size=512)
kv.add_vectors(class_names + modified_class_names + adjectives, embeddings.numpy())

In [28]:
# kv.most_similar(
#     positive=['bright aero imagery of Airport', 'aero imagery of Forest'],
#     negative=['aero imagery of Airport' ]
# )
kv.most_similar(
    positive='bright aero imagery of Airport',
    negative='aero imagery of Airport'
)

[('bright', 0.2118937075138092),
 ('bright aero imagery of Beach', 0.19221678376197815),
 ('bright aero imagery of School', 0.18275639414787292),
 ('bright aero imagery of RailwayStation', 0.1824687421321869),
 ('bright aero imagery of Industrial', 0.17214228212833405),
 ('bright aero imagery of Resort', 0.17184950411319733),
 ('bright aero imagery of Forest', 0.17086166143417358),
 ('bright aero imagery of Playground', 0.17047804594039917),
 ('bright aero imagery of Desert', 0.16464237868785858),
 ('bright aero imagery of Church', 0.15913262963294983)]

In [26]:
kv['bright aero imagery of Airport'] - kv['aero imagery of Airport']

array([ 1.70031004e-02, -1.71628505e-01,  7.59258866e-03,  1.14711225e-02,
       -2.66577303e-02, -8.69839340e-02, -1.22551166e-01, -3.72449875e-01,
       -5.44427186e-02, -6.51580468e-02, -1.36438012e-02,  7.69337192e-02,
       -1.92082673e-01, -1.48425505e-01,  6.59660250e-02,  1.78295061e-01,
       -1.31608263e-01, -3.99440974e-02, -4.21633124e-02,  3.49799544e-03,
       -9.83523428e-02,  9.39022601e-02,  9.06825066e-04, -1.03176340e-01,
        1.29979849e-01,  7.18982220e-02,  6.12522513e-02,  1.65171623e-02,
        7.03807473e-02,  1.89028680e-02,  1.97555512e-01, -1.06432229e-01,
       -1.44201070e-02,  1.79693550e-01,  7.59713054e-02, -9.25313830e-02,
       -3.53454947e-02,  6.51446208e-02,  2.03149334e-01,  2.64013380e-01,
        2.91950524e-01, -5.00544384e-02, -2.41017044e-02, -2.52985179e-01,
       -1.14364922e-01,  1.13233924e-04, -9.80196148e-02,  1.06438994e-01,
       -1.99030295e-01, -2.07864583e-01,  7.63451904e-02,  2.72645354e-02,
       -4.42154706e-02, -

In [8]:
embedding_by_adj = {}
for adj in adjectives:
    embedding_by_adj[adj] = {class_label: kv[f'{adj} {class_label}'] for class_label in class_names}
embedding_by_adj['default'] = {class_label: kv[class_label] for class_label in class_names}

In [9]:
differences = {}
for adj in adjectives:
    differences[adj] = {}
    for class_label in class_names:
        differences[adj][class_label] = (embedding_by_adj[adj][class_label] - embedding_by_adj['default'][class_label])


In [10]:
from matplotlib import pyplot as plt

In [11]:
# fig, axs = plt.subplots(2, 2, figsize=(10, 8))

# for i, adj in enumerate(adjectives):
#     ax = axs[i // 2, i % 2]
#     ax.plot(embedding_by_adj[adj].values(), label=adj, color='blue')
#     ax.plot(embedding_by_adj['default'].values(), label='default', color='red')
#     ax.set_title(adj)
#     ax.legend()

# plt.tight_layout()
# plt.show()

In [12]:
adj_differences = {}
for adj in adjectives:
    adj_differences[adj] = []
    for class_label in class_names:
        adj_differences[adj].append(differences)

In [14]:
# import pca
from sklearn.decomposition import PCA