# Towhee image search

In [1]:
import random
import towhee
import time
from towhee import Entity
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

# connect to milvus
connections.connect(host='localhost', port='19530')

In [2]:
# create reverse_image_search collection and build index
def create_milvus_collection(collection_name, dim):
    fields = [
    FieldSchema(name='id', dtype=DataType.INT64, descrition='ids', is_primary=True, auto_id=True),
    FieldSchema(name='path', dtype=DataType.INT64, descrition='image path'),
    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    # create IVF_FLAT index for collection.
    index_params = {
        'metric_type':'L2',
        'index_type':"IVF_FLAT",
        'params':{"nlist":2048}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    return collection


collection = create_milvus_collection('reverse_image_search', 2048)

In [3]:
# insert data into reverse_image_search collection
time1 = time.time()
mr = (
    towhee.glob['path']('/Users/chenshiyu/workspace/data/pic/coco-images/*.jpg', 
                        '/Users/chenshiyu/workspace/data/pic/coco-images/*png')
          .image_decode['path', 'img']()
          .image_embedding.timm['img', 'vec'](model_name='resnet50')
          .runas_op['path', 'hash_path'](func=lambda path: abs(hash(path)) % (10 ** 8)) # delete when support String in Milvus2.1
          .to_milvus['hash_path', 'vec'](collection=collection, batch=128)
)
print(time.time()-time1)
print(mr)

39.71364188194275
(insert count: 198, delete count: 0, upsert count: 0, timestamp: 432699600978313218)


In [4]:
search_args = dict(
    param={"params": {"nprobe": 10}},
    output_fields=['path'],
    limit=10
)
# search in milvus and show the results
res = (
    towhee.glob['path']('/Users/chenshiyu/workspace/data/pic/test/*.jpg')
      .image_decode['path', 'img']()
      .image_embedding.timm['img', 'vec'](model_name='resnet50')
      .milvus_search['vec', 'result'](collection=collection, **search_args)
      .show()
)

path,img,vec,result
/Users/chenshiyu/workspace/data/...,,"[0.57396334, 0.18509091, 0.44970408, ...] shape=(2048,)","[{""id"": 432676697476957959, ""score"": 0.0, ""path"": 27801468},{""id"": 432676697476957906, ""score"": 33.61406326293945, ""path"": 50861878},{""id"": 432676697476957923, ""score"": 42.41242218017578, ""path"": 62983539},{""id"": 432676697476958003, ""score"": 46.91796112060547, ""path"": 42140892},...] len=10"
/Users/chenshiyu/workspace/data/...,,"[0.35551175, 0.010351829, 0.033109855, ...] shape=(2048,)","[{""id"": 432676697476957980, ""score"": 0.0, ""path"": 91676887},{""id"": 432676697476957920, ""score"": 17.603469848632812, ""path"": 76885468},{""id"": 432676697476957939, ""score"": 17.945392608642578, ""path"": 62790225},{""id"": 432676697476957929, ""score"": 18.28226089477539, ""path"": 56458057},...] len=10"
/Users/chenshiyu/workspace/data/...,,"[0.010864214, 0.0, 0.24841177, ...] shape=(2048,)","[{""id"": 432676697476958027, ""score"": 18.915029525756836, ""path"": 86667502},{""id"": 432676697476957995, ""score"": 22.226421356201172, ""path"": 68799297},{""id"": 432676697476957859, ""score"": 26.136245727539062, ""path"": 50850464},{""id"": 432676697476957943, ""score"": 37.03239440917969, ""path"": 14902195},...] len=10"
/Users/chenshiyu/workspace/data/...,,"[0.30699396, 0.084877014, 0.04200204, ...] shape=(2048,)","[{""id"": 432676697476958014, ""score"": 0.0, ""path"": 40014767},{""id"": 432676697476958027, ""score"": 66.16842651367188, ""path"": 86667502},{""id"": 432676697476957997, ""score"": 66.945068359375, ""path"": 91510270},{""id"": 432676697476957943, ""score"": 68.54490661621094, ""path"": 14902195},...] len=10"


In [5]:
(
    towhee.glob['path']('/Users/chenshiyu/workspace/data/pic/test/*.jpg')
      .image_decode['path', 'img']()
      .image_embedding.timm['img', 'vec'](model_name='resnet50')
      .milvus_search['vec', 'result'](collection=collection, **search_args)
      .runas_op['result', 'result'](func=lambda res: [x.path for x in res])
      .select['img', 'result']()
      .show()
)

img,result
,"[27801468,50861878,62983539,42140892,...] len=10"
,"[91676887,76885468,62790225,56458057,...] len=10"
,"[86667502,68799297,50850464,14902195,...] len=10"
,"[40014767,86667502,91510270,14902195,...] len=10"


In [6]:
# collection = Collection('reverse_image_search')
# print(collection.num_entities)
# collection.drop()

# Advanced Test

In [7]:
# expetion safe
(
towhee.glob['path']('/Users/chenshiyu/workspace/data/pic/test/*.jpg')
      .exception_safe()
      .image_decode['path', 'img']()
      .image_embedding.timm['img', 'vec'](model_name='resnet50')
      .drop_empty()
      .to_list()
)

[<Entity dict_keys(['path', 'img', 'vec'])>,
 <Entity dict_keys(['path', 'img', 'vec'])>,
 <Entity dict_keys(['path', 'img', 'vec'])>,
 <Entity dict_keys(['path', 'img', 'vec'])>]

In [8]:
# parallel execute
time1 = time.time()
mr = (
    towhee.glob['path']('/Users/chenshiyu/workspace/data/pic/coco-images/*.jpg', 
                        '/Users/chenshiyu/workspace/data/pic/coco-images/*png')
          .set_parallel(4)
          .image_decode['path', 'img']()
          .image_embedding.timm['img', 'vec'](model_name='resnet50')
          .runas_op['path', 'hash_path'](func=lambda path: abs(hash(path)) % (10 ** 8))
          .to_milvus['hash_path', 'vec'](collection=collection, batch=128)
)
print(time.time()-time1)
print(mr)

13.999315023422241
(insert count: 198, delete count: 0, upsert count: 0, timestamp: 432699605937029121)


In [9]:
# metric report
