```
before go through this notebook, please start MILVUS sever by using `docker-compose up -d` command
```

In [4]:
import os
from pathlib import Path

import numpy as np
import requests
from dotenv import load_dotenv

dotenv_path = Path('../.env.dev')
load_dotenv(dotenv_path=dotenv_path)

CLIP_PORT = os.environ.get("CLIP_PORT", None)
MILVUS_PORT = os.environ.get("MILVUS_PORT", None)

assert CLIP_PORT is not None, "CLIP_PORT is not set"
assert MILVUS_PORT is not None, "MILVUS_PORT is not set"

from pysearch.milvus import Milvus2Processor as MilvusProcessor

config = {
    # Global config
    "HOST": "0.0.0.0",
    "PORT": MILVUS_PORT,
    "INDEX": "image_fact",
    "RETURN_SIZE": 10,
    "CACHE_DIR": ".cache/",
    # Milvus config
    "DIMENSION": 768,
}

In [5]:
proc = MilvusProcessor(config, autoload_collection=False)
proc.info()

{'name': 'image_fact',
 'description': 'Pysearch collection',
 'num_entities': 0,
 'collections': ['test_index', 'lsc23_full_db', 'image_fact', 'test'],
 'details': [{'metric_type': 'L2',
   'index_type': 'IVF_FLAT',
   'params': {'nlist': 2048}}]}

## Prepapre DB

In [6]:
import pandas as pd 
from tqdm import tqdm 
df = pd.read_csv('../data/fact_embs.csv')
df.head(1)

Unnamed: 0,url,claim,content,top_image,index,embeddings
0,https://leadstories.com/hoax-alert/2022/07/fac...,NASA used images of Devon Island to fake photo...,Analog Mission Did NASA use the landscape of a...,https://leadstories.com/caption_3479233.jpg,https://leadstories.com/hoax-alert/2022/07/fac...,"[-0.022705078125, -0.056243896484375, -0.01136..."


In [18]:
df = df.dropna()
import json
df['embeddings'] = df['embeddings'].apply(json.loads)
df

Unnamed: 0,url,claim,content,top_image,index,embeddings
0,https://leadstories.com/hoax-alert/2022/07/fac...,NASA used images of Devon Island to fake photo...,Analog Mission Did NASA use the landscape of a...,https://leadstories.com/caption_3479233.jpg,https://leadstories.com/hoax-alert/2022/07/fac...,"[-0.022705078125, -0.056243896484375, -0.01136..."
1,https://www.boomlive.in/fact-check/no-this-is-...,Photo shows image of death certificate with PM...,An image of a provisional certificate of COVID...,https://www.boomlive.in/h-upload/2021/04/22/94...,https://www.boomlive.in/fact-check/no-this-is-...,"[0.009429931640625, 0.006649017333984375, -0.0..."


In [19]:
features = np.array([x['embeddings'] for i, x in df.iterrows()])
ids = np.array([x['url'] for i, x in df.iterrows()])

In [24]:
print(ids[0])
print(features.shape)

https://leadstories.com/hoax-alert/2022/07/fact-check-nasa-did-not-represent-devon-island-canada-as-mars.html
(2, 768)


In [25]:
proc.index_list_document(features, ids)

(insert count: 2, delete count: 0, upsert count: 0, timestamp: 445979445836120065, success count: 2, err count: 0): 100%|██████████| 1/1 [00:00<00:00, 66.90it/s]


Function update_list_document elapsed time: 0:00:00.025549


In [26]:
query = np.ones((1, 768)) * 7
results = proc.search(query, top_k=3)
results

Function search elapsed time: 0:00:00.275259


(['https://leadstories.com/hoax-alert/2022/07/fact-check-nasa-did-not-represent-devon-island-canada-as-mars.html', 'https://www.boomlive.in/fact-check/no-this-is-not-a-death-certificate-with-pm-modis-image-on-it-12868'],
 [37621.0390625, 37659.984375])

In [27]:
results = proc.search(query, top_k=10, filter=["https://leadstories.com/hoax-alert/2022/07/fact-check-nasa-did-not-represent-devon-island-canada-as-mars.html"])
results

Function search elapsed time: 0:00:00.153989


(['https://leadstories.com/hoax-alert/2022/07/fact-check-nasa-did-not-represent-devon-island-canada-as-mars.html'],
 [37621.0390625])