```
before go through this notebook, please start elastic server by using `docker-compose up -d` command
```

In [1]:
import requests
import os
from datetime import datetime
from dotenv import load_dotenv
from pathlib import Path
import numpy as np
import pandas as pd 

dotenv_path = Path('../.env.dev')
load_dotenv(dotenv_path=dotenv_path)

ELASTIC_PORT = os.environ.get("ELASTIC_PORT", None)
ELASTIC_USERNAME = os.environ.get("ELASTIC_USERNAME", None)
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD", None)

assert ELASTIC_PORT is not None, "ELASTIC_PORT is not set"
assert ELASTIC_USERNAME is not None, "ELASTIC_USERNAME is not set"
assert ELASTIC_PASSWORD is not None, "ELASTIC_PASSWORD is not set"

config = {
    # Global config
    "HOST": "0.0.0.0",
    "PORT": ELASTIC_PORT,
    "USERNAME": ELASTIC_USERNAME,
    "PASSWORD": ELASTIC_PASSWORD,
    "INDEX": None,
    "RETURN_SIZE": 10,
    "CACHE_DIR": ".cache/",
    "DIMENSION": 2,
}

## Prepapre DB

In [3]:
import pandas as pd 
from tqdm import tqdm 
df = pd.read_csv('../data/fact_30k.csv')
df.head(1)

Unnamed: 0,url,claim,content,top_image,index
0,https://leadstories.com/hoax-alert/2022/07/fac...,NASA used images of Devon Island to fake photo...,Analog Mission Did NASA use the landscape of a...,https://leadstories.com/caption_3479233.jpg,https://leadstories.com/hoax-alert/2022/07/fac...


In [4]:
df = df.fillna('')
df.describe()

Unnamed: 0,url,claim,content,top_image,index
count,28852,28852,28852,28852,28852
unique,28852,28279,26293,27358,28852
top,https://leadstories.com/hoax-alert/2018/05/fak...,Agencia Tributaria: Os ha calificado para un r...,Alt News Alternative News and Views in the Pos...,https://newsmobile.in/image,https://leadstories.com/hoax-alert/2018/05/fak...
freq,1,5,358,495,1


## Create index in Elasticsearch

In [5]:
from pysearch.elastic import ElasticProcessor
config['INDEX'] = 'factcheck'
proc = ElasticProcessor(config)

Connected to Elasticsearch node


In [6]:
proc.available_indices()

dict_keys(['factcheck'])

In [6]:
# proc.kill('factcheck')

In [7]:
df_structure = {  
    "mappings": {
        "properties": {
            'index': {"type": "text"}, # required for pysearch
            'url': {"type": "text"},
            'claim': {"type": "text"},
            'content': {"type": "text"},
            'top_image': {"type": "text"},
        }
    }
}

In [8]:
proc.index_dataframe(df, df_structure)

100%|██████████| 28852/28852 [00:20<00:00, 1441.20it/s]


In [9]:
import rich
rich.print(proc.info())

In [10]:
proc.search(text_query='Obama',topk=3)

Function run elapsed time: 0:00:00.000014


[{'_index': 'factcheck',
  '_id': 'https://factly.in/photoshopped-image-shared-as-obama-holding-the-potrait-of-ambedkar/',
  '_score': 32.278206,
  '_source': {'url': 'https://factly.in/photoshopped-image-shared-as-obama-holding-the-potrait-of-ambedkar/',
   'claim': 'Obama holding the portrait of Ambedkar.',
   'content': 'A photo of Barack Obama (Former President of USA) holding the portrait of Ambedkar is being shared widely on social media. Let’s try to check the authenticity of the photo. The archived version of the post can be found here. Claim: Photo of Obama holding the portrait of Ambedkar. Fact: It is a photoshopped image. In the original picture, a set of gloves were present in the place of Ambedkar. Hence the claim made in the post is FALSE. When the image was run through the Yandex Reverse Image Search, many similar images were found in the search results. It was found that the posted image is a cropped version. The original full picture was found on the ‘Flickr’ website w