In [1]:
import json
import typing as t

# data prep
import pandas as pd
import numpy as np

# for creating image vector embeddings
from PIL import Image
from img2vec_pytorch import Img2Vec

# for creating semantic (text-based) vector embeddings
from sentence_transformers import SentenceTransformer



In [2]:
import os

os.getcwd()

'/Users/robert.shelton/Documents/redis-product-search/data'

In [3]:
# load in data and clean data types and drop null rows
metadata = pd.read_csv("./product-images-sm/styles.csv", on_bad_lines='skip')
metadata.dropna(inplace=True)
metadata["year"] = metadata["year"].astype(int)
metadata.info()

<class 'pandas.core.frame.DataFrame'>
Index: 44077 entries, 0 to 44423
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  44077 non-null  int64 
 1   gender              44077 non-null  object
 2   masterCategory      44077 non-null  object
 3   subCategory         44077 non-null  object
 4   articleType         44077 non-null  object
 5   baseColour          44077 non-null  object
 6   season              44077 non-null  object
 7   year                44077 non-null  int64 
 8   usage               44077 non-null  object
 9   productDisplayName  44077 non-null  object
dtypes: int64(2), object(8)
memory usage: 3.7+ MB


In [4]:
metadata["product_text"] = metadata.apply(lambda row: f"name {row['productDisplayName']} category {row['masterCategory']} subcategory {row['subCategory']} color {row['baseColour']} gender {row['gender']}".lower(), axis=1)
metadata.rename({"id":"product_id"}, inplace=True, axis=1)

metadata.info()


<class 'pandas.core.frame.DataFrame'>
Index: 44077 entries, 0 to 44423
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   product_id          44077 non-null  int64 
 1   gender              44077 non-null  object
 2   masterCategory      44077 non-null  object
 3   subCategory         44077 non-null  object
 4   articleType         44077 non-null  object
 5   baseColour          44077 non-null  object
 6   season              44077 non-null  object
 7   year                44077 non-null  int64 
 8   usage               44077 non-null  object
 9   productDisplayName  44077 non-null  object
 10  product_text        44077 non-null  object
dtypes: int64(2), object(9)
memory usage: 4.0+ MB


In [5]:
# check out one of the texts we will use to create semantic embeddings
metadata["product_text"][0]

'name turtle check men navy blue shirt category apparel subcategory topwear color navy blue gender men'

In [6]:
# Resnet-18 to create image embeddings
img2vec = Img2Vec(cuda=False)

# bert variant to create text embeddings
model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/robert.shelton/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:03<00:00, 15.2MB/s]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.3k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/653 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [33]:
def get_batch(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def generate_image_vectors(products, image_base_path, batch_size=1000):
    output_dict={}

    for batch in get_batch(products, batch_size):
        product_ids = batch['product_id'].values.tolist()
        image_filenames = [image_base_path + "/" + str(_id) + ".jpg" for _id in product_ids]
        images=[]
        converted=[]

        for img_path, _id in zip(image_filenames, product_ids):
            try:
                img = Image.open(img_path).convert('RGB')
                img = img.resize((224, 224))
                images.append(img)
                converted.append(_id)
            except:
                #unable_to_convert -> skip to the next image
                continue

        #Generate vectors for all images in this batch
        vec_list = img2vec.get_vec(images)

        #update the dictionary to be returned
        batch_dict= dict(zip(converted, vec_list))
        output_dict.update(batch_dict)
        print(f"Processed {str(batch_size)} product images")

    return output_dict

def generate_text_vectors(products_df):
    text_vectors = {}
    # generate text vector
    for index, row in products_df.iterrows():
        text_vector = model.encode(row["product_text"])
        text_vectors[row["product_id"]] = text_vector.astype(np.float32)
        if index % 1000 == 0:
            print(f"Processed {str(index)} product text fields")
    return text_vectors

# combine into a single json file
def combine_vector_dicts(txt_vectors, img_vectors, products):
    product_vectors = []
    for _, row in products.iterrows():
        try:
            _id = row["product_id"]
            text_vector = txt_vectors[_id].tolist()
            img_vector = img_vectors[_id].tolist()
            vector_dict = {
                "text_vector": text_vector,
                "img_vector": img_vector,
                "product_id": _id
            }
            product_vectors.append(vector_dict)
        except KeyError:
            continue
    return product_vectors

def write_product_vector_json(vector_dict):
    product_vector_json = json.dumps(vector_dict)
    with open("./product_vectors.json", "w") as f:
        f.write(product_vector_json)

def write_product_metadata_json(metadata_df):
    products = []
    for _, row in metadata_df.iterrows():
        product = {
            "product_id": row["product_id"],
            # create a text based representation to create a semantic embedding with
            "product_metadata": {
                "name": row["productDisplayName"],
                "gender": row["gender"],
                "master_category": row["masterCategory"],
                "sub_category": row["subCategory"],
                "article_type": row["articleType"],
                "base_color": row["baseColour"],
                "season": row["season"],
                "year": row["year"],
                "usage": row["usage"]
            }
        }
        products.append(product)

    products_json = json.dumps(products)
    with open("./product_metadata.json", "w") as f:
        f.write(products_json)
    return products


In [29]:
data_path = "./product-images-sm/images"
num_products = 1000
image_vectors = generate_image_vectors(metadata[:num_products], data_path, batch_size=1000)
text_vectors = generate_text_vectors(metadata[:num_products])
vector_dict = combine_vector_dicts(text_vectors, image_vectors, metadata)
write_product_vector_json(vector_dict)
products_meta = write_product_metadata_json(metadata[:num_products])



KeyboardInterrupt: 

In [34]:
products_meta = write_product_metadata_json(metadata[:num_products])
[v["product_id"] for v in vector_dict] == list(metadata["product_id"][:1000])

True

In [35]:
products_meta

[{'product_id': 15970,
  'product_metadata': {'name': 'Turtle Check Men Navy Blue Shirt',
   'gender': 'Men',
   'master_category': 'Apparel',
   'sub_category': 'Topwear',
   'article_type': 'Shirts',
   'base_color': 'Navy Blue',
   'season': 'Fall',
   'year': 2011,
   'usage': 'Casual'}},
 {'product_id': 39386,
  'product_metadata': {'name': 'Peter England Men Party Blue Jeans',
   'gender': 'Men',
   'master_category': 'Apparel',
   'sub_category': 'Bottomwear',
   'article_type': 'Jeans',
   'base_color': 'Blue',
   'season': 'Summer',
   'year': 2012,
   'usage': 'Casual'}},
 {'product_id': 59263,
  'product_metadata': {'name': 'Titan Women Silver Watch',
   'gender': 'Women',
   'master_category': 'Accessories',
   'sub_category': 'Watches',
   'article_type': 'Watches',
   'base_color': 'Silver',
   'season': 'Winter',
   'year': 2016,
   'usage': 'Casual'}},
 {'product_id': 21379,
  'product_metadata': {'name': 'Manchester United Men Solid Black Track Pants',
   'gender': 'Me

In [36]:
merged = [vd | md for vd, md in zip(vector_dict, products_meta) if vd["product_id"] == md["product_id"]]

In [37]:
merged[0].keys()

dict_keys(['text_vector', 'img_vector', 'product_id', 'product_metadata'])

In [38]:
with open("./products.json", "w") as f:
    json.dump(merged, f)

In [2]:
import os
os.getcwd()

'/Users/robert.shelton/Documents/redis-product-search/data'

In [4]:
from redis.asyncio import Redis
from redisvl.schema import IndexSchema
from redisvl.index import AsyncSearchIndex

dir_path = "../backend/productsearch/db/schema"
schema = IndexSchema.from_yaml(os.path.join(dir_path, "products.yml"))
client = Redis.from_url("redis://localhost:6379")
index = AsyncSearchIndex(schema, client)

In [5]:
await index.client.ping()

True

In [13]:
product_id = 10062
res = await index.client.hget(f"{index.schema.index.prefix}:{product_id}", "img_vector")
res

b'+.9=\x88M\xb6<\x0cl9=\x87z\xe5=\xcc=\xd4>\xd1,\xb9?)\xe3\x89>oc>>\x16X\x94?\xb0N\x1b?PqT=I\xe8\xc7<P\xb3\x1c?\x1aQ\xbf<\x8d\x1c\xc2;\xc0\xd1\x8b?\xcei\xca>\x00\x00\x00\x00\x87l\xa4>^\xa4b>s\x15\xc6=n\xb3L?/\x98N@\x1b\xd7\xe5?9\xa6\r>b\xfa\xce>r2\x12?\x82\x7f\x82@\xcc\xfas;\xf7\x1bJ?\xea\xa7\x8d?\xf6J\x05=\xff-\x94>}\xb3\xdc?V\x89@@9\x8dV?oy)?A\xaf_>\x81\xff\x1f?p\x03\xd5>\x1d\xd8\x86?\x99\x15\xb7=\xd5~n=\xb3\xb1\xb0?\xa6\xc2\r@<\xcd\x7f=\x99\x1b\x90>\xd0Z\x07=\x7f\x8c6?\xe6E\x08?\x92;B@\x85\x96 ?\xee\x97\xc0>o\xf2Z?4\x99\x99?\x9ev\xcc>MF\xd1>8@\x11@\xc0}\xbe>\x9a\xb4.?\xfb\x8aD=s\xcc\xa8?\xa1N\xb7?H\xb6\xa6=\x94&\x89?kq\x1d>\xd6\xf3\x99=E\x8a\xe0>m* >\x12\x97>?\x95\x02\xed>[\x07\xce?\x9fkL<\xa3\x89\xc4>\xad\xf1\x17?G\xa1y>yP\xe4>\x1f\x8b?@\x033\r>\xe2"\xc5?\x16\x15j?hU\x0e@\xf6s\x11=\x01c\xe5?\x88b\x98?\xc9\xa8\x9c=\x0e\x80E?\xaa\x7f`>G\xeem@\x05\xe7J@t\xae\x88>\xb56\x03?\x96\xe2\x82?^I\xab?G\xf9\x10@\'+\x96?\x15\xdf-?\xda\xbf\xb6?\xce\x8f<?\xce\xe6\x18;\r\xd7P>\xd4\x01\xd8>\xff\xb3\

In [12]:
index.schema.index.prefix

'product'

In [15]:
import json
with open("./products.json", "r") as f:
    products = json.load(f)

In [29]:
import pandas as pd

img_lookup = pd.read_csv("./fashion-dataset/images.csv")

img_lookup.head()

Unnamed: 0,filename,link
0,15970.jpg,http://assets.myntassets.com/v1/images/style/p...
1,39386.jpg,http://assets.myntassets.com/v1/images/style/p...
2,59263.jpg,http://assets.myntassets.com/v1/images/style/p...
3,21379.jpg,http://assets.myntassets.com/v1/images/style/p...
4,53759.jpg,http://assets.myntassets.com/v1/images/style/p...


In [38]:
for p in products:
    filename = f"{p['product_id']}.jpg"
    img_url = img_lookup[img_lookup["filename"] == filename].iloc[0]["link"]
    if not img_url:
        print("no image!")
    p["product_metadata"]["img_url"] = img_url

In [39]:
products[0]["product_metadata"]

{'name': 'Turtle Check Men Navy Blue Shirt',
 'gender': 'Men',
 'master_category': 'Apparel',
 'sub_category': 'Topwear',
 'article_type': 'Shirts',
 'base_color': 'Navy Blue',
 'season': 'Fall',
 'year': 2011,
 'usage': 'Casual',
 'img_url': 'http://assets.myntassets.com/v1/images/style/properties/7a5b82d1372a7a5c6de67ae7a314fd91_images.jpg'}

In [25]:
products[0]["product_id"]

15970

In [40]:
with open("./products.json", "w") as f:
    json.dump(products, f)

In [None]:
[
    {
        "product_id": "56670",
        "name": "Maxima Men Black Dial Watch",
        "gender": "Men",
        "category": "Accessories",
        "img_url": "http://assets.myntassets.com/v1/images/style/properties/Maxima-Men-Black-Dial-Watch_c427a05e5f1d545b2b03ad86b4960427_images.jpg",
        "text_vector": "\t\"ݪTೇӀN\u001e6Ɠ=<Ɂ<j~<\f,w\u0015=ׇ:?\r\u0015/1<|<=\u0016=O\u0015+'={=)،.H\"=_\u0011<\u001br)=S\u00019\b\u0014=%=bv<:e\b<)We|bnEt<T\u00067\u0003;G<2<!\u0006û'\"=F!BQ=ȼ:2$g:\n;8tއB9%4\bh7,\u0003\u001d<刻&\u001d[<ǣ<R\u001d=<D=-<Ά=-><\u0011d\\[<k}\u0014C<ٽ\u0015\u000ee\u0012A\u00038\u0007;ϼ\f~[\u000191!<xi;;\u0018mn:8X=\u001fkJ<G=Bb\u0016]\u0013;\u000fV<K=\\<$;h@\np;RjӳOP7<B9ʼ\u001b@S\u001c=F:\t<,\u0004i\u0018\u0005_\u0001<=Kb\u0011<{<[@<\u0010_:=9A+\u000b=Ąw</^Zrʕyo(\\P<]\u001bg\u0018~\u001a\u0003\u00119x;\f<^r\u0004<y%=\u0002\"ܼ4;G93oJa=\u0000<o{<#\u000e\u0015Rm\"\u0005=7=n=\u0001\u0007E\u0018RV^&<`5=\u0000[\u0013\nJ=VN<쁼_Cd\u0017=#HD<=<U\u0017)=\u0012jq\u0014<\u0007<a%:=aqJo<\u0017*\u0014=y4;~\u001ev\u001eV2<l5A=*T=};\u0006<w\u0012\u0013<8\u0014^0<+\u0005r=Gi\u001bM`;,<!@<TO@\u0012kѝ<\b9>\u001d\u0013:N6\u000bϓ\u001fQ\u00197 =:\u0012=\f;G;;Ev\u0016<c<E&fl4=s+r=cu1=Q\r7I*\u0018^[V\b/=g<$#%\u0004=#IЫ\u0011=5(<;(˯<%>Y#TP;|k\u0001.<Vl;| ,s<O'U5<6\u001b\u0011(:\u0016v7>=\u0010I\u0007'\n=ԀDIz\u0016\u000b=\u000ey20\u000fwb={<F\u0000;WC=%<Z=\b;8\u0006\f=$=\u000ff<S!)\nƼ/0ؼL\u001e<v\u0005ta@:4<\u0001VZ;*׻y<Gh;\u0006<9F`<k\u0019<\t\u0003ּ^<z<A\u001f<DW=za=5<x!1=\u001aɀ=\f[îm\u0006o(\\Y7*=/= \u0019=9:1<t<v\u0002R3<8q=V;Յ<&}\u000b=a\u0000=Վ\u00108w\u001b=\u0015[=\u001d_\u000f#\u0006!n\u0001/\u0018d\u000f=E,=4<[\u001ebQ=L*<\u0013nQ\r\"U<=\u000fT< ƅ;\nC:z2<7(\u000e<8<\u000fP;\u0016s=[\u0006=YV<=`<{<\u0014<\u000fb{Vw;<<+؍\u001eqhh\nK&=\u0018e\u0003=\u001fػ/\u0004GռU\u0018\u0018{購\u0005:\u0001|\u0005:>[B\u0006=xK\u0005=k0=>}o%\t<\\Ǽ^<\u0007E;WXG25=Z*B`|7˼-=\u0015\u001f=+\u0016=^<s;Xm<\u0019\u00163u)=U\u000eɔ<1=}2A7@;EoǼ\u001f\n(< \r\f=C$=;y׼}ש;&&\nz=\u0006GiŻ:\u001aN<?J=zӄ=qz=>ͺ_6Ǽ\u0017\u0013B=W\u000b:W3=\u0006\u0010\u001cm=8=4=\u0014ɱ<&O弘\\:=>= 8k\u001e<9.\f;(pT\u0019\u0005W\u001ftp<re<&xO<ğ3O\u0000\u0001\u000b-0=q!<~\u00115<M \bxS\u00078==ɸ9\u001d<\tI\u0006=L<\u001f=w<Ȼ=!\u000f;\u0000_;fo=$\u0011&?~;]ST=h^\u0001=!,;Ml\bkzRwm?<FD\tB\u0011=E\u0012%?z<\u001dתt6_;i\u001b\u0018ت<J<\u0015<y};_<ͻ=\u0002cԺ\nS<px\u000e<`t((=ut=<b\u0013j<ܭk=07V\u001dbea<\u001ac\u0007<_M\u0017=쵌GhXüW;]?$=\u0012j<98=l7Xб6\u0011'ȼi<q.<lJ=<!N\u001b\u0006=?#<t\u0017y<mh+\u0014Ӽ`Dn<*&=$=\u000bnt0u=i@O;l;\u0011)<v\\\u001a=-xo<ٕL;K<\u0016\u000e'<xC<Jͻ\u0000H=:lZ|N<\u0011Vt<nE\u0018lb;^=9\u0012\u0015\b<9\u0004<ed_gߙ=p5:>Q<dX\u001c=@-;Y< :'$S=\u0005<\r\u001c멻Q:ڵ<~=ٷ\u0005؇:\u0013]\u0004T%<%n\u001c=\u0014ƥb<T\u00067'Y\u001f=T*-F|='=\u00159=<=\u000b!7<ΛSpR<<gwH;p\u0014<O*:=\u0017Rտ^'&;r\r<\fz6)\u0007l\u0019WüT <F։ӡ=BI^=\f@(\u001e<L=׼\u0016F==\u000e\u0012jQq|<\nb=\u0001-<rR\u001c=;PfJ\fe=<Re/,<D<I@d0=3v;]_Q<<\u000b\b=|!;o=* u.Qe}0\u0003h5<x<k\u0013\u0019\u0003<\u0017<\b|;ZK<\f=Â<O\u000e\u001c>\u0003=c=\u001f-M8z8!=4\u001f5<t:<Q<\u000b_a\u000e0ZY7=ˎ\r;_NrD<,\u000fFT\u001698@Jm<sɇ<E;L;j\u0004;ؼLc5\u0001\u001a1<==\u0005<!߻\t\u0005\u001e?\u0016\u001d}e\u001cP(\u001c<;<d\u001d<O&=U@=r=:L<\u0002\u0002=\u0014ҫ;H9\u0004<Uh\u0018=R\u0012<|\u001c/l%;\u000e}+=\u001av>+;_<DXQ\t3\u00025=<\u0017\u001a=\u0002tA6;eJ\u0007M*=<t\u0019\u0016!\u000fW:=(\u0001=",
        "img_vector": "7?0w>bƐ?k9>G=>\u00000?*p\u0001@\u001d>w?\u0019/@c>w\"?T|?k<R?k;K?C\n==r`?\u001fƯ>9>Af?\u0010>#?k?\u001c>>\u001f:?\t˓??\u001b?#?G>\u0012\u0010>b?ڛ?J\u0011?%\r<\u0011?a>\u000e\u001c?><I<\u0016}S<O;>\b>\u0014>6*?\u001c=>>?*5@\u0006?wc>7n\u0010@>\u001e=AU?\u0019?{w9>C=C>o\u001a<>(>\u0005(=p_>I=\"@\r?'<6>q\u0010@L>\u0014o<ቍ?>T?a\u0004?(o?(\u0010\u0002>[>\u00166v@I?|=!K^?\u0003<N\u0005X<fz>ʒ=u>\u00001>X>|>T\u000f?Z?j?}u?ė=\u001a?kZ<wan>D.\u000e@\u00010>\u0013\t<\r>n@7?0\u0001=R=K@\u0003G>4.>&?(?T>,@?r\u0000?߱@\u0003,>ro=\u000e>?p>=0&f?d=E?2\r??\u0005y\u0006?йd?eXM>a?9>ٽi?|g>0\u0002>\u0018?i>j?>p_?=>+p>\u001e\n?=K?=,I=R?\r\u001c>2?\b>4>Tm?S\n=\u001cR=7\u0012?zx\u001f?j?A>79>Q0@G+w=\u0005=bw@\f%@{>1F?]\u0006@݁<^m?\u0006@]?<8!p>οh>.@<8:U>\u0004:>Xx?\u000e?0\u000b>0N>2>\u0006=h-m>PT'??y?H\n\u001f=?>nO\u0012@%&<(?\u0005>\u0015q=oߩ>'?<J?\u0016>r\r>\u0011M=\b\u001c?6d?\u0000\u0000\u0000\u0000\u0001?J9P>&\u001e@Y\u0016\u001a?=]}>>2H\u000f?Nj`;&>G=\u0010<?&L>\u001ar={?C\u001e>\u0001a>j?H\u0013_?\u001d:^>s>O\u0004>Z*\r>I2=,Rr?c\u000f?P=J<>ߗ=o>\u000e?\u001f\u0001>uF?\u000e?\u0010=K@Q\\>#,<\u0003V>*<f;*?q̍@#K>qA?H?>'s?=\u0005\u0000>@f?\u0007ϡ=/G?Z@>G'=;?PZ@\u0007!=aS?\u0016!>\n\u001a>\u0019Y>|qO?\u0012'?=\u000f@e=>,?`?$?J]>Yj?>\u0001\f\r@3h?f?x<?\u000e<?>s>@%>a\u000b>M?ceW?'j?h>)=73\u0005?Y?c>\u0005>s\u0005!?7&;?e\t?c\u0010>p@{.?\u0002>W;Po>H<U_@5t=\u000e?v>4>;\u000b@[Yi>\u0005>%(C?g->:?0\f\u0010=\u0000??\u000bR\t>x??%\u0012<\u001e\u001a?=젦>d>e>SB>\u0015g<6<V\u0013???\u0006>>k>=Dm<Ƕ?\t=?\u0015-?f?3\u0007=<JC? ?Ԍ=>\u0000>[=W?=\u0015>\u0010H\n?;\u0018?6\u0002@\u0007q\u0017@NW\u000b>\u001cJ\u0004>\u001a?5?#@^,?Dx?9>\fd?S?J\u0018\u0003@\u0002@p=D?\\<w\u0013@\u0000?\u001a>\u0018>40b?3\u0002>ȧ?s4>n61?w\t?\"\u0000>\u0000\u0000\u0000\u0000ǀ=>?K?̨>?LT>/C??9\r>\u00073=>9v=G=գ6?\u0003@>>'>>١?f>3\u001f;j]?\u0018;?ry>|X?=YF?h?t\u0007=H??L?e?C>0d?J=(ka?y>R\u001d?0?\u0015E>\u001d>an?)i>@@\">\u0007e?\u0001.~??\u0016>>г>Pl>b?g#=?c\n>\u000b?N<$#>\u0019c?=oh>\r3?)<t\u001c=@9\u0004=!2>\"*>i\u0018?\u0004w?T>=\u0017#?e\u0006X>^>p2>}\u000e?\u0005_?\u001f\u0015<??> >\r?G\f<j\b?|r?ts1?=\u0005\\>?\b\u001b?Vʙ>ɣ?A>0xH=v]?\b\u0016\u0007?R\u0005=i>j\u0010<|>\u0011@N?'?\u0019G>k+|>8\u0015=&>R<kF\u0000?\rq?7=|\t<v>U\t<W=Mn>?]\u001e>2=H[\u0011@\u001c>4>^\u000e>,[>?[S?Q\u0019]>\u0000\u0000\u0000\u0000",
        "vector_distance": 0.0,
        "similarity_score": 1.0
    },
    {
        "product_id": "8110",
        "name": "Fastrack Men Economy 2 Analg Black Watch",
        "gender": "Men",
        "category": "Accessories",
        "img_url": "http://assets.myntassets.com/v1/images/style/properties/112edfec7e1965c0f64ad9eb51cac579_images.jpg",
        "text_vector": "\r\u0000\u000bjL'<\u0007\n^g\u0004<\u0016\f\u0010</=\u001e;Ux9\rOT\";k;M==0<\u001f\u0006lq$=0Ђ=X<\u00183q=\u0001\u001b=Qu[d=\r?!\u0010r;m <@\rT<;\u0006 =<%wA<ռ\u0004,\u0006=j\u000e==gyr\b|=i5m =\u0003\b'\u001a&<r;ͼЖv\"\u0019FCMN깼Z\u0004\u0005aļ\u0004ݣjp;\u0017<\u0002%n<fK=FS\\\\==4z<lB!\u0006m<}&\u0001μ\u0006P-A\u00169t\u0001+;\u001f<R뒽*v\u0004a<\u000bO\u000b=\b\u0010%.=\fqL57\u0014p<j0\u001f=CS6;F;]3=fX\u0018 =6ƚ<\u0004\u000b5=uE\u0001<=3\tG;\u00141:<(\u001f<\u000e??7a\u000b\b\u0001:<o\u0017=\u0006;h2W{AQS=\u001b<Ų;\\$\u000f=R\u0013y<bi8=$v;p:\u0002%ک\u0017;t+\u0007\u00179\u001czd\u0003\b1;?5h\u0019:[<([,B<O\u0014<Y?\u000b׍:=\u0004l\u0003=\u0006v<_L\u0002=\u000e\u0018>\u0018K<\n<lB<4sȼ፽Ӛ<iS<P2\u001eQ=539\u0019U\u0007<2;y@=\u001b<\u00137һ_j<M\u000b<<a;\u0005҃b=\u0011-\u001b:&<}\u0006|=\u001fW'ͅ\n\u0007\u0011p\u0019߼|\u0010<x<+\u0011\tu<n\u0016<9\u0000=\u0007\u0000=\u0010db{\u0019e=~I̼?μ5\u0011I=u\u001e\u001b<ŵ;H{|aXv弃퇽EkP;?\bϼ\\R<D<;!0?<!y\u000fNX=\u000b\u0018X\u0006̠\u000f=B=Yv\u001d=\u0019=~ d*GCOaW\f\nkD=\u0000\u0004a\u0018=Q5<C:<`\u0007\u001cj5=5<wJx¢yǂ<F&4\u0010\u0011wk<\u000fI^S;\u0000\u0004\u000f\rD1VZ<\u0006򼮛\u0012=#O;zI\u001f~3==-޼\u0014\u0012m==g=\u0016< =<<Z<h\u0012\u001d<ț<\u0017=<61\u0014iE;`Jme=.{Ƽ\\u<|P<Aa@\b\u0011D\u0013\u001b;<\u0010O{\u0019$f;E<<cnT;|>\n<p'=ZX~L\u000f=n4=e\n=^_U=z(G67F:\u0013\u001f<$=Qc;\\S<ǥ\u0015=\f=\b\u0012\"=p<ׁQ<z.\u000b=r\u000e=\u0007:+<oH=q\u0017s7͠μh\u0019<\b=<'-A=5\u0015`MEnż\u001e=\bv=9j<<\u0000=AA<nb<A>=,%;\u0016\u0004C=h8=T2v;m=na<\u0016<\u000bs\u0003=<K@պϋ<ҞRL= rׄK7jC=Y<\u0006=::+\u0017E39hu\u0010Euһ\u0012:ٻy]\u0012\u0004$:M\fc;|<Ř8\rz~<]޼je<X<\r%CS=\u0002\u0004'ʼ{-A0S=\u0004=m=mq<za.̝H<n?\u000e=T\tn<I8i=1. <D.mI\u0003^\u0016_H<9\u001e=y<8iּ0֌_<\u000eTU<,\u001b%;:^[=-9\u001e\u0004\"|ﻧ;\r<F=9t='=i\u0013;;\f,\u001b=3Qͺм}=\\?]~<'\u0003\u0012>o=-><[\nn1\u000f&=\u0005#I\u0018;ܣ<$\u0011K̼\u0016pVD\u000e<\u00131\u000e\u000b';<\u001d#;5Q\u00007=I;Nd\u0015Zj;~\u001d<L:$r\u000b@=뽃f<D%=\u0000\u001c=<<=֣;sJ=\u001dC]v<\u000fP;*\u0010<O\u001bpS<7=Ϡ<<\u0007\u001bkv|\b\u0004\u0018)=2X\t+}R<#\f<DMGG\u0019ʯ2\u0013dl\f;]9N\b5[<?\u0017\u0000=|f\u0018<<%<xN{6皽\u000ed;\\ʼ`u<%S<;=\u0011\u001f=ǻ4\u0013g<Ѿ=B\u0006<w4\nU3\u0006x\u0003%/<\"a;2ZS<֔n\u001bP\u000bIb׼JÂ<`<\t:V=Ugv<WS;P<#=s<x)n\f=е<مnI<\u0015㈼ݰb<\u0011[\u0017=j= \u001b=P\u0016fټxeh=q\n\u0018cZ\u001b<F;uz=\u001d\u001a7tu92ֺp;̳Ȱ;i\u0019j<b;;t\"=#a\f\u001f\u0016a\t\u0000\u000bO;<3=\u001b\u0007;ڵӻ-@={\u0006o\n(\f\u001a<,%]<u<\u0011\u0015<DT\f\u000ejM<W<,YU\u0002=\u001a\u0011=<:=őo<\u0007V\u0016=ϊ<d\u0001<+\u0017eu7=\u001a<<Վ\u000f=d<y<<z;\u001f\\:\u0010=\u0004\u0007\u0013=׊<;\u0003!u\u0019>G&vwW҅<Uⅽ;\n]\u000e<g<\u0007GʼOM=Oh0=i:R=g\u0002<\n=\u0000F'=*ɔ\u001a\u001f:=Ǿ=b;\u000e*\u000e=\u0017;5<H\b=H޻Uü\u0004<\u0004;v8\u0004-3_;|<\u001d<6Q\u0006a\u001c< =r\u0014:ϛ;Ybo\u0010w<\u0019\b\u0003=λDg-9=3{=\"Բ<\\G<\"Hw=Y\u0005w׼*=,\u001b*s<iw:B\u001b\u0018=Ƽ\u0018/<]\u001b%f\u0018=d)\u0011EБa\u0016=y!<.\u001b;-#\u000b;+<c9<\u0001:A\u0002\u00164:\u001ccr\u0005$2=\u0006<=1\t<̼f\u000fY\u001d\u001d3AԼQ<`I=lq={=\u0013U=uD6=b<{\u0016IB\u0004=$/<-;N\u000b$<ü\u001b<^^7dO<=Ȳ<\u0001O\u0007=IY\u0007/7<m6x\u0006;\u001e\u0014s<HjiD?>\u0014\u0007!\u000b=,0N\u001a\nļ0G<`$=\u001a\u0001g<Юٺ\u001a\u0013K<",
        "img_vector": "c?<2?&z>q>n\u0003;?\u0005%*@da\u0010?3?g@TT3>k?.>\bm\f?b0>sJ?sm\u0017?JCA?\t>7=lH?2X>w?\n3?\f?\\=2?(T(?Tt>B?\u0019? ?V\u001f>\u0010@Z?ŋc<]<?k(?gS@\u0001\t=\u001d\b'=\\\u001c<_ =P~>j:?G\u0011>?'>!\u0004>?}*\u001f??\u0003@\u0015=?0>??ÂR>r>p.\f?T?7>S>+?9;=e\u000e=P\u0016><4_E>X>\"@@NY=\n\u0011>B\r\r@ӆ>a<?tB>.?>I~>>z?Rו>\u001c=gw@?{>\n?%>֖>y?tZ?H\u0007>5?cD>ΐ1?J>>>'?24@\u0015?F\u0006?\u0015=F?ޣJ>?>\u0017;>\u001bM@bD?IZ>*=U>@\n\u000b?M\u001dW=\u001b7?1?>\u001ffe?7?ŭ@\u0001?4M?ߛ>?\u0001?g_<>>?&?].?[\u0016 ?\u001b\u0011?\u001b?T}>Q?\u0018=TF,>\u0005?nmg=Nxe?]#=f\"?\u0007?\u0011K!>XG?iZ?\u001d<=D<%\u0012?B>uX>>\u0004\u0018??y>\f\u0000??}?4>?\u001d<Q>a[?ʪ@\f=s\u0004<ҕ@\u0015\u001d@\u001d>\u00056=~?=aխ>%@\u0016=>\u0006?a\b?>\u0000\u0000\u0000\u0000\u001f>\u000e=z@?^\u0001B>\r>a?[Q>y?<;#?>\u0004>\u000e\u0005>\u001f\b=\u0006?A\"?D=\u001a\u001e>\u0013?*=\u001a?\"\u0017(?UW>Rz?j>\u0002\u0019>z?TM?\u0017@zF>b?6=79D='>\u0010kT=:'>Y>\u0010W?H?Z[>}x;{g?,`? ?آ>k\u0000>\u0000\u0000\u0000\u0000:T?\f=0>\n)\u001d>n>==s\n???2\u0001>/\u001f?ڦ=ȗ?gGB?c#?\f?/V?Qְ=\u0015@\u0011\u000fL?\b=\u0001^>a>\t?@<>>%?Xk?\u0003Y$=>.\u001a@ƙ>?RY?}-x<\rdq?\u0000A@<\u0003?\"3\">@\u0015>\u0000M>\u0015I>9d@?\u0004=@\u0014k\u0011?<~7?Q?p\t?k>\u0011@S=j@P?\u0010?\u0001\u000e?*:D>>\u0015Y@EX<*\bN?U\u0014`?\u0014\u0004?;>ӂ?+#=2T<?Y{>%s>Z̓?/,?\t5>\u0016R?<>\b\u0019@Ax?A.\u001c?=3Q>=\u0002a@#<\\>*F:?A>\u0007d>Z@\u001c=7>?\u0012\u0006>\f\u00013?^G>H\u0016?@\u0018?\u001fU?\u0001?$?\u0019>\u0018u>l\r?j+>;?p\u0010<U\u0001>.<<R<9@m>?-\u001c>c \u0018>\u0000=rEq>Z?\u001d>\u0001>I>?m#>fLX=?\u0006P?ڛ=>\u001b%?\u0012\u0019?\u001f)@\u0014T>?>Nj?%Q?\u001c?]?\u0006=\u0012x??H\u0005?{\u0002Z@?P?%I>|?s?\u0012{&@O@\u0015d=c\u001e@0=???V%?\u0000=Ŏ?yr>>'\u001c?-?D/>oF+>Ԗ;Ejm=p4?\u0015@QO\u0006>`?r[7?\b]>=\u00071>n>>ǻ\r?B?',@p=?D_?'?\u0007X?[F3?D>\u0000ɧ=\u0000?K4=cY=s?E\u0007>]h>g>u3?\u0002-=N\u0006@\fz?^?>[a>ֹ=5\u0000>(l>9=]\u001c?`\u0011?n\u0006\u0014?\u0019>M\u0001=\nn@Ќ>\u0010@\b?6D\n?v?z>>>$?\u0007='?\u0003>;n?DZ?ү=P?gD>^S]=8?9/>N\u0004>,>F\u0014?\u0011?\u0015a?eb?:>Ƚ>J\u001a>\u0018\u001cA?0J?b?\u0019?\u0019G?\u0014\t=b9?%u>;=\u001f?>\u0007\u001e;>ۀ>\u0004@\u001d+?Ƥ?|,>>5?Ǿ1>I5\u000f?\u001c}?)\u0012=e4?x>r\u0005>'d\u0000@t78?\u0016>>T>\u0006\n>4.?N\u000e<\"a?\u001d.?p\u001ci=0GD=f{?\"=\u0014<D>l\t?̹?eXO=(n@\u0000\u001e\u0018?l\u0004>`\u0002=\u0006?Ve?\u0012؂?\u0007==",
        "vector_distance": 0.11654239893,
        "similarity_score": 0.88345760107
    }
]

In [49]:
sample_products = [p for p in products if str(p["product_id"]) in ["56670", "8110", "4943"]]

In [50]:
with open("./test_vectors.json", "w") as f:
    json.dump(sample_products, f)

In [48]:
products[57]["product_id"]

4943