In [1]:
import sys
sys.path.append('/app/')  # noqa

from jina import Document, DocumentArray, Flow, Executor, requests
from jina.types.request import Request
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import argparse
from memlish.executors.cache import OpenAICLIPImageEncoderCache
from memlish.executors.cache import RealSBERTEncoderCache
from memlish.executors.index import FaissIndexer
import torch
import hashlib

def my_hash(s): 
    return str(int(hashlib.md5(str(s).encode('utf-8')).hexdigest(), 16))

from pathlib import Path
Path.ls = lambda x: list(x.iterdir())

In [2]:
torch.multiprocessing.set_start_method('spawn', force=True) 

In [3]:
IMAGE_DIR = Path("/data/imgflip/scrap_language_image_pairs_20220209/images")

In [4]:
images = IMAGE_DIR.ls()

In [5]:
JINA_LCLIP_EMBEDDING_TEMPLATE_IMAGE_COLLECTION = "02_lclip_imgflip_template_image_100k_embeddings"
MONGO_EMBEDDING_DB_NAME = 'memlish_db'

In [6]:
def plot_search_results(resp: Request):
    for doc in resp.docs:
        print(f'Query text: {doc.text}')
        print(f'Matches:')
        print('-'*10)
        show_docs(doc.matches[:3])
        
def input_docs(images):
    for img_path in images:
        doc = Document(id=str(img_path.name), uri=str(img_path), tags={'filename': str(img_path)})
        yield doc 

def show_docs(docs):
    for doc in docs:
        doc.load_uri_to_image_blob()
        plt.imshow(doc.blob)
        plt.show()

In [11]:
embedder_params = {
    "batch_size": 64,
    "device":'cuda'
}

image_cache_params = {
    "embedder_params": embedder_params,
    "collection_name": JINA_LCLIP_EMBEDDING_TEMPLATE_IMAGE_COLLECTION,
    "embedding_field_name":'emb',
    "megabatch_size":4096
}

In [12]:
from jina import Flow
flow_index = Flow().add(uses=OpenAICLIPImageEncoderCache, name="CLIPImageEncoderCache", uses_with=image_cache_params)
flow_index

In [13]:
with flow_index:
    flow_index.post(on='',inputs=input_docs(images), request_size=1000, show_progress=True)

           Flow@15[I]:[32m🎉 Flow is ready to use![0m                                             
	🔗 Protocol: 		[1mGRPC[0m
	🏠 Local access:	[4m[36m0.0.0.0:41639[0m
	🔒 Private network:	[4m[36m172.20.0.2:41639[0m
	🌐 Public address:	[4m[36m35.224.116.253:41639[0m[0m
[32m⠹[0m Working... [32m━[0m[32m╸[0m[2m[32m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [36m0:00:00[0m estimating... 

  0%|          | 0/1 [00:00<?, ?it/s]

[32m⠙[0m Working... [32m━━[0m[32m╸[0m[2m[32m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [36m0:00:32[0m  0.0 step/s . 

100%|██████████| 1/1 [00:32<00:00, 32.35s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

[32m⠇[0m       DONE [33m━━━[0m[33m╸[0m[2m[33m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [36m0:00:59[0m  0.0 step/s [K3 steps done in 59 seconds


100%|██████████| 1/1 [00:26<00:00, 26.90s/it]
