In [3]:
import os
# import annoy
import re
import requests
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from typing import List, Tuple

from inference import *
from visualization import *

In [4]:
generator = FashionEmbeddingGenerator()

def img_folder2csv(category, data_path, save_path, file_name, batch_size: int=256):
    df = pd.DataFrame(columns=['id', 'embed', 'category'])
    file_paths = os.listdir(data_path)

    for idx in tqdm(range(0, len(file_paths), batch_size)):
        batch = file_paths[idx:idx + batch_size]
        ids = list(map(lambda x: re.sub(r'\.jpg$', '', x), batch))
        images = list(map(lambda x: Image.open(os.path.join(data_path, x)), batch))
        embeds = generator.img2embed(images)
        df = pd.concat([df, pd.DataFrame.from_dict({'id': ids, 'embed': embeds})], axis=0)

    df['category'] = category
    df.to_csv(os.path.join(save_path, file_name), index=False)

Downloading (…)lve/main/config.json:   0%|          | 0.00/4.46k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

Some weights of the model checkpoint at patrickjohncyh/fashion-clip were not used when initializing CLIPVisionModelWithProjection: ['text_model.encoder.layers.1.self_attn.k_proj.weight', 'text_model.encoder.layers.0.mlp.fc1.weight', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.3.self_attn.k_proj.bias', 'text_model.encoder.layers.2.self_attn.v_proj.weight', 'text_model.encoder.layers.8.self_attn.k_proj.weight', 'text_model.encoder.layers.7.mlp.fc1.weight', 'text_model.encoder.layers.6.self_attn.v_proj.weight', 'text_model.encoder.layers.7.self_attn.v_proj.bias', 'text_model.encoder.layers.5.layer_norm1.bias', 'text_model.encoder.layers.8.mlp.fc2.weight', 'text_model.encoder.layers.0.self_attn.q_proj.weight', 'text_model.encoder.layers.0.self_attn.k_proj.weight', 'text_model.encoder.layers.1.self_attn.q_proj.bias', 'text_model.encoder.layers.2.layer_norm2.weight', 'text_model.encoder.layers.5.mlp.fc2.bias', 'text_model.encoder.layers.11.layer_norm1.b

Downloading (…)rocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

In [3]:
category = 'top'
data_path = f'../data/FashionVCdata/{category}'
save_path = '../data/FashionVCdata/'
file_name = f'{category}_embeds.csv'
img_folder2csv(category, data_path, save_path, file_name)

100%|██████████| 59/59 [07:24<00:00,  7.53s/it]


In [4]:
category = 'bottom'
data_path = f'../data/FashionVCdata/{category}'
save_path = '../data/FashionVCdata/'
file_name = f'{category}_embeds.csv'
img_folder2csv(category, data_path, save_path, file_name)

100%|██████████| 54/54 [06:22<00:00,  7.07s/it]


In [6]:
df = pd.read_csv(os.path.join(save_path, file_name))

query_idx = 512
query = df.iloc[query_idx]
df['sim'] = df.apply(lambda x: torch.nn.CosineSimilarity(dim=0)(torch.Tensor(x['embed']), torch.Tensor(query['embed'])), axis=1)

n=10

print('query')
query_file_name = str(query['id']) + '.jpg'
show_single_image(data_path, query_file_name)

print('most_similar G.T')
most_similar = df.sort_values('sim', ascending=False, ignore_index=True)[1:n + 1]
file_names = list(map(lambda x: str(x) + '.jpg', most_similar['id'].tolist()))
show_top_n_image(data_path, file_names, most_similar['sim'].tolist(), n=n)

print('least_similar G.T')
least_similar = df.sort_values('sim', ascending=True, ignore_index=True)[0:n]
file_names = list(map(lambda x: str(x) + '.jpg', least_similar['id'].tolist()))
show_top_n_image(data_path, file_names, least_similar['sim'].tolist(), n=n)

TypeError: new(): invalid data type 'str'

In [None]:
# Annoy Initialization
annoy_index = annoy.AnnoyIndex(f=512, metric='angular') # 512 is embed size of CLIP

for i, row in df.iterrows():
    annoy_index.add_item(i, np.array(row['embed']))

annoy_index.build(n_trees=10) # More n_tree helps improve accuracy
annoy_index.save('test.ann')

In [None]:
# Embedding Vector of Query
query_embed = np.array(query['embed'])
# It outputs 'index' and 'distance' (the smaller, the more similar) for n similar vectors.
get_nns_list = annoy_index.get_nns_by_vector(vector=query_embed, n=n, include_distances=True)

print('most_similar ANN')
file_names = list(map(lambda x: str(df.iloc[x]['id']) + '.jpg', get_nns_list[0]))
show_top_n_image(data_path, file_names, get_nns_list[1], n=n)