#### Saturday, February 24, 2024

Let's try to conduct a search for images of a specific person on the Match images.

In [1]:
import cv2
import numpy
import time
import csv
from glob import glob
from pathlib import Path
from statistics import mean

from towhee import pipe, ops, DataCollection
from towhee.types.image import Image
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility




First thing we need to do is to build a list of all the images in our MyMatchImages dataset.

In [5]:
MY_MATCH_IMAGES_ROOT_FOLDER = '/home/rob/Data/Documents/Github/rkaunismaa/NLP4HTML/express/express-match/public/images'

Create a function to scan the folder and return the list of files.

In [27]:
import os
import pandas as pd
import numpy as np
from PIL import Image

def scan_images(folder_path):
    image_files = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')):

                fullFileName = os.path.join(root, file)
                rootFile = fullFileName.replace(folder_path+"/","")
                userId = rootFile.replace("/"+ file,"")
                
                # obtain the image properties
                img = Image.open(fullFileName)
                width, height = img.size
                format_type = img.format
                mode = img.mode
                img.close()

                image_files.append([fullFileName, rootFile, userId, file, width, height, format_type, mode])
                
    return image_files

Now scan the target folder, building the list of files to be uploaded.

In [28]:
my_match_images = scan_images(MY_MATCH_IMAGES_ROOT_FOLDER)
len(my_match_images)

19650

In [29]:
my_match_images[42]

['/home/rob/Data/Documents/Github/rkaunismaa/NLP4HTML/express/express-match/public/images/mzMSydVu3xfI_soFv4UliA2/af1eb47f-0d1d-ee11-9101-f40343f49558.jpeg',
 'mzMSydVu3xfI_soFv4UliA2/af1eb47f-0d1d-ee11-9101-f40343f49558.jpeg',
 'mzMSydVu3xfI_soFv4UliA2',
 'af1eb47f-0d1d-ee11-9101-f40343f49558.jpeg',
 900,
 1000,
 'JPEG',
 'RGB']

Create a pandas dataframe of the data, then save that dataframe as a csv file.

In [18]:
column_names = ['Full_File_Name', 'Root_File_Name', 'User_Id', 'File_Name', 'Image_Width', 'Image_Height', 'Image_Format', 'Image_Mode']

In [30]:
dfMyMatchImages = pd.DataFrame(my_match_images, columns=column_names)

In [31]:
dfMyMatchImages.head()

Unnamed: 0,Full_File_Name,Root_File_Name,User_Id,File_Name,Image_Width,Image_Height,Image_Format,Image_Mode
0,/home/rob/Data/Documents/Github/rkaunismaa/NLP...,zwuhN2JsKCYu_bHIYadOig2/3ceb7bf7-c70f-ee11-910...,zwuhN2JsKCYu_bHIYadOig2,3ceb7bf7-c70f-ee11-9101-f40343f42de0.jpeg,1000,1000,JPEG,RGB
1,/home/rob/Data/Documents/Github/rkaunismaa/NLP...,zwuhN2JsKCYu_bHIYadOig2/6baa1d11-c80f-ee11-910...,zwuhN2JsKCYu_bHIYadOig2,6baa1d11-c80f-ee11-9101-f40343f42de0.jpeg,712,711,JPEG,RGB
2,/home/rob/Data/Documents/Github/rkaunismaa/NLP...,zwuhN2JsKCYu_bHIYadOig2/a71566ad-c80f-ee11-910...,zwuhN2JsKCYu_bHIYadOig2,a71566ad-c80f-ee11-9101-f40343f42de0.jpeg,772,771,JPEG,RGB
3,/home/rob/Data/Documents/Github/rkaunismaa/NLP...,zwuhN2JsKCYu_bHIYadOig2/4da52ed3-c70f-ee11-910...,zwuhN2JsKCYu_bHIYadOig2,4da52ed3-c70f-ee11-9101-f40343f42de0.jpeg,999,1000,JPEG,RGB
4,/home/rob/Data/Documents/Github/rkaunismaa/NLP...,zwuhN2JsKCYu_bHIYadOig2/1fc57056-c80f-ee11-910...,zwuhN2JsKCYu_bHIYadOig2,1fc57056-c80f-ee11-9101-f40343f42de0.jpeg,804,804,JPEG,RGB


Now save that dataframe to a csv file.

In [34]:
my_match_images_csv_filename = "MyMatchImages.csv"
dfMyMatchImages.to_csv(my_match_images_csv_filename)

Next, let's create the embeddings for these images.

In [35]:
MODEL = 'resnet50'
DEVICE = None # if None, use default device (cuda is enabled if available)

# Milvus parameters
HOST = '127.0.0.1'
PORT = '19530'
TOPK = 10
DIM = 2048 # dimension of embedding extracted by MODEL
COLLECTION_NAME = 'My_Match_Images'
INDEX_TYPE = 'IVF_FLAT'
METRIC_TYPE = 'L2'

# path to csv (column_1 indicates image path) OR a pattern of image paths
INSERT_SRC = my_match_images_csv_filename

# target match query search image
someMatchImage = 'ko1yxtyDnBYUnshNvpaiAw2/df2d5f53-5683-ed11-90fd-f40343f49558.jpeg'
QUERY_SRC = MY_MATCH_IMAGES_ROOT_FOLDER + "/" + someMatchImage

In [37]:
# Create milvus collection (delete first if exists)
def create_milvus_collection(collection_name, dim):
    
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500, 
                    is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params': {"nlist": 2048}
    }
    collection.create_index(field_name='embedding', index_params=index_params)
    return collection

In [39]:
# Read images
decoder = ops.image_decode('rgb').get_op()

def read_images(img_paths):
    imgs = []
    for p in img_paths:
        img = decoder(p)
        imgs.append(img)
#         imgs.append(Image(cv2.imread(p), 'RGB'))
    return imgs

In [40]:
# Embedding pipeline
p_embed = (
    pipe.input('img_path')
        .map('img_path', 'img', ops.image_decode('rgb'))
        .map('img', 'vec', ops.image_embedding.timm(model_name=MODEL, device=DEVICE))
        .map('vec', 'vec', lambda x: x / numpy.linalg.norm(x, axis=0))
)

In [41]:
to_insert = glob(INSERT_SRC)
to_test = glob(QUERY_SRC)

In [43]:
to_insert

['MyMatchImages.csv']

In [42]:
# Display embedding result, no need for implementation
p_display = p_embed.output('img_path', 'img', 'vec')

DataCollection(p_display(to_insert[0])).show()

2024-02-24 12:03:40,278 - 140526246291008 - image_decode_cv2.py-image_decode_cv2:68 - ERROR: Read image MyMatchImages.csv failed


RuntimeError: Node-image-decode-0 runs failed, error msg: Read image MyMatchImages.csv failed, Traceback (most recent call last):
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/nodes/node.py", line 158, in _call
    return True, self._op(*inputs), None
                 ^^^^^^^^^^^^^^^^^
  File "/home/rob/.towhee/operators/towhee/image-decode/versions/main/image_decode.py", line 14, in __call__
    return self._op(image_path)
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/factory.py", line 125, in __call__
    result = self._op(*args, **kws)
             ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/rob/.towhee/operators/image-decode/cv2/versions/main/image_decode_cv2.py", line 69, in __call__
    raise RuntimeError(err)
RuntimeError: Read image MyMatchImages.csv failed
, Traceback (most recent call last):
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/nodes/node.py", line 171, in process
    self.process_step()
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/nodes/_map.py", line 63, in process_step
    assert succ, msg
AssertionError: Read image MyMatchImages.csv failed, Traceback (most recent call last):
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/nodes/node.py", line 158, in _call
    return True, self._op(*inputs), None
                 ^^^^^^^^^^^^^^^^^
  File "/home/rob/.towhee/operators/towhee/image-decode/versions/main/image_decode.py", line 14, in __call__
    return self._op(image_path)
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/rob/miniforge3/envs/milvus/lib/python3.11/site-packages/towhee/runtime/factory.py", line 125, in __call__
    result = self._op(*args, **kws)
             ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/rob/.towhee/operators/image-decode/cv2/versions/main/image_decode_cv2.py", line 69, in __call__
    raise RuntimeError(err)
RuntimeError: Read image MyMatchImages.csv failed


