In [4]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")

import math
import random

import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm
from transformers import DistilBertTokenizer

from src.data_processing.dataset import FlickrDataset
from src.model.model import CLIP
from src.predict import encode_image, encode_text_query, find_similar

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
cfg = {'model':{"projections": 768},
       'test': {"test_subset": None}}

trained_model_path = "../model_weights/clip-epochs-10-partial-set.pt"
model = CLIP(cfg = cfg)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
state_dict = torch.load(trained_model_path, map_location=torch.device(device))
model.load_state_dict(state_dict)

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

<All keys matched successfully>

In [6]:
random.seed(10)

# dataset = UnsplashDataset(tokenizer, "../data/unsplash/photos.tsv*")
dataset = FlickrDataset(image_folder_path = "../../data/flickr-dataset/Images/", caption_path = "../../data/flickr-dataset/captions.txt")
dataset_len = len(dataset)
test_set_df = pd.read_csv("../data/test_set.csv", index_col = 'index')
# test set should instead read the index from the csv, assuming that the dataset doesnt change (hopefully!)
# otherwise, use the dataset and feed in the index instead to the dataset class if possible
if cfg['test']['test_subset']:
    subset_indices = random.sample(test_set_df.index.tolist(), cfg['test']['test_subset'])
else:
    subset_indices = test_set_df.index.tolist()

test_set = Subset(dataset, subset_indices)

test_dataloader = DataLoader(test_set, batch_size=16, shuffle = False)

FileNotFoundError: [Errno 2] No such file or directory: '../../data/flickr-dataset/captions.txt'

In [None]:
# imgs, labels = next(iter(test_dataloader))
# encoded_images = encode_image(model, imgs)

# to_cat = torch.tensor([subset_indices])
to_cat = torch.zeros(1, 512)

with tqdm(test_dataloader) as tepoch:
    for imgs, _ in tepoch:
        encoded_images = encode_image(model, imgs)
        to_cat = torch.cat((to_cat, encoded_images.to('cpu')), 0)
final_embedding = to_cat[1:]
# concat the subset_index to the front of the embeddings. End shape is 200x513, where 0th index is the image index
final_embedding = torch.cat((torch.Tensor(subset_indices).reshape((len(test_set),1)),final_embedding), dim = 1)

  0%|          | 0/253 [00:00<?, ?it/s]

100%|██████████| 253/253 [01:17<00:00,  3.26it/s]


In [None]:
text = "A man rock-climbing"
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
text_embedding = encode_text_query(model, text, tokenizer)

In [None]:
top_k = 6
top_k_similar = find_similar(final_embedding.to(device), text_embedding, top_k)

In [None]:
top_k_image_ids = final_embedding[:, 0][top_k_similar.cpu().numpy().tolist()].numpy().astype(int).tolist()

In [None]:
results = test_set_df.loc[top_k_image_ids]

In [None]:
image_files = results['image'].tolist()
from PIL import Image
import os
import numpy as np

imgs = np.zeros((1, 224, 224, 3))
for i in range(len(image_files)):
    img = Image.open(os.path.join("../../data/flickr-dataset/Images",image_files[i]))
    img = np.array(img.resize((224, 224))).reshape(1,224,224,3)
    # img = img.unsqueeze(0)
    imgs = np.vstack((imgs, img))
imgs = imgs[1:]

In [None]:
import plotly.express as px
fig = px.imshow(np.array(imgs), facet_col=0, facet_col_wrap=top_k/2, width = 900, height = 600, title = f"Prompt: {text}")
fig.show()