In [45]:
!pip install tensorflow-io
!pip install elasticsearch==8.4.3

Collecting elasticsearch==8.4.3
  Using cached elasticsearch-8.4.3-py3-none-any.whl (384 kB)
Installing collected packages: elasticsearch
  Attempting uninstall: elasticsearch
    Found existing installation: elasticsearch 7.17.5
    Uninstalling elasticsearch-7.17.5:
      Successfully uninstalled elasticsearch-7.17.5
Successfully installed elasticsearch-8.4.3


In [1]:
import os
import time
from sklearn.model_selection import train_test_split
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
import tensorflow_io as tfio
import json
from tqdm import tqdm
import pickle

In [25]:
df = pd.read_csv('./data/extracted_features_sub_dataset_4_reducted.csv')

In [26]:
df.shape

(6096, 518)

In [5]:
ES_NODES = "http://localhost:9200"
index = 'open-images'

es_client = Elasticsearch(hosts = [ES_NODES])

In [48]:
res = es_client.indices.delete(index=index)
print("Response from server: {}".format(res))

ConnectionError: ConnectionError(<urllib3.connection.HTTPConnection object at 0x000001624FC2D400>: Failed to establish a new connection: [WinError 10061] Aucune connexion n’a pu être établie car l’ordinateur cible l’a expressément refusée) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0x000001624FC2D400>: Failed to establish a new connection: [WinError 10061] Aucune connexion n’a pu être établie car l’ordinateur cible l’a expressément refusée)

In [51]:
settings = {
  "settings": {
    "elastiknn": True,
    "number_of_shards": 1,
    "number_of_replicas": 0
  }
}

mapping = {
  "dynamic": False,
  "properties": {
      "ImageID": { "type": "keyword" },
      "featureVec": {
          "type": "elastiknn_dense_float_vector",
          "elastiknn": {
            "dims": 512,
            "model": "lsh",
            "similarity": "l2",
            "L": 99,
            "k": 3,
            "w": 2
          }
    },
    "Title": { "type": "text" },
    "AuthorID": { "type": "text","index":False},
    "Title": { "type": "text" },
    "Tags": { "type": "text" },
    "OriginalURL":{"type":"text","index":False}
  }
}

print("creating the '{}' index.".format(index))
res = es_client.indices.create(index=index, body=settings)
print("Response from server: {}".format(res))

# es_client.indices.put_settings(json.dumps(settings), index)
es_client.indices.put_mapping(json.dumps(mapping), index)
es_client.indices.get_mapping(index)

creating the 'open-images' index.


  res = es_client.indices.create(index=index, body=settings)


Response from server: {'acknowledged': True, 'shards_acknowledged': True, 'index': 'open-images'}


  es_client.indices.put_mapping(json.dumps(mapping), index)
  es_client.indices.get_mapping(index)


{'open-images': {'mappings': {'dynamic': 'false',
   'properties': {'AuthorID': {'type': 'text', 'index': False},
    'ImageID': {'type': 'keyword'},
    'OriginalURL': {'type': 'text', 'index': False},
    'Tags': {'type': 'text'},
    'Title': {'type': 'text'},
    'featureVec': {'type': 'elastiknn_dense_float_vector',
     'elastiknn': {'L': 99,
      'dims': 512,
      'k': 3,
      'model': 'lsh',
      'similarity': 'l2',
      'w': 2}}}}}}

In [27]:
records = df.to_dict(orient="records")

In [28]:
def image_infos():
    for record in tqdm(records):
        yield {
          "_op_type": "index",
          "_index": index, 
          "_id": record["ImageID"], 
          "ImageID":  record["ImageID"],
            "featureVec": [record['f {}'.format(i+1)] for i in range(512) ],
          "Title": record["Title"],
          "AuthorID": record["AuthorID"],
          "Tags": record["tags"],
          "OriginalURL":record["OriginalURL"]

        }



In [29]:
bulk(es_client, image_infos(), chunk_size=2000, max_retries=2)

  bulk(es_client, image_infos(), chunk_size=2000, max_retries=2)
100%|█████████████████████████████████████████████████████████████████████████████| 6096/6096 [00:10<00:00, 568.95it/s]


(6096, [])

In [30]:
es_client.indices.refresh(index=index)
es_client.indices.forcemerge(index=index, max_num_segments=1, request_timeout=300)

  es_client.indices.refresh(index=index)
  es_client.indices.forcemerge(index=index, max_num_segments=1, request_timeout=300)
  es_client.indices.forcemerge(index=index, max_num_segments=1, request_timeout=300)


ObjectApiResponse({'_shards': {'total': 1, 'successful': 1, 'failed': 0}})

In [12]:
source_no_vecs = ["ImageID", "Title", "AuthorID", "Tags", "OriginalURL"]

In [10]:
def search_by_query(q,size=5):
    body = {"query" : {
        "multi_match": {
          "query": q,
          "fields": ["Title", "Tags"]
        }
      }}

    res = es_client.search(index=index, body = body, size=5, _source=source_no_vecs)
    return res

In [31]:
search_by_query("food")

  res = es_client.search(index=index, body = body, size=5, _source=source_no_vecs)
  res = es_client.search(index=index, body = body, size=5, _source=source_no_vecs)


ObjectApiResponse({'took': 58, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10000, 'relation': 'gte'}, 'max_score': 9.4137, 'hits': [{'_index': 'open-images', '_type': '_doc', '_id': '43ba6432f8f41bed', '_score': 9.4137, '_source': {'OriginalURL': 'https://c6.staticflickr.com/5/4044/4246129716_059d40cbdd_o.jpg', 'AuthorID': 'smanography', 'ImageID': '43ba6432f8f41bed', 'Title': 'Food, Glorious food', 'Tags': 'Fast food, Dessert, Food, Baked goods'}}, {'_index': 'open-images', '_type': '_doc', '_id': '42f18e310686dd08', '_score': 9.4137, '_source': {'OriginalURL': 'https://c1.staticflickr.com/5/4048/4301409950_583f48173b_o.jpg', 'AuthorID': 'yogendra174', 'ImageID': '42f18e310686dd08', 'Title': 'Food.. Glorious Food', 'Tags': 'Plant, Flower'}}, {'_index': 'open-images', '_type': '_doc', '_id': 'd45c47119a247f1e', '_score': 9.178661, '_source': {'OriginalURL': 'https://c7.staticflickr.com/4/3463/3872424562_c3a6713390

In [14]:
def search_by_image_query(feature_vector,size=5):
    query = {"query" : {
            "elastiknn_nearest_neighbors": {
                "field": "featureVec",
              "vec": {"values": feature_vector},
              
              "model": "exact",
              "similarity": "l2",
              "candidates": 150
            }
          }
          }
    
    return es_client.search(index=index, body = query, _source=source_no_vecs)


In [15]:
from feature_extractor import FeatureExtractor
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO, StringIO
import requests


In [16]:
loaded_model = pickle.load(open("./data/pca_model.pkl", 'rb'))

fe = FeatureExtractor()



In [17]:
img = Image.open(BytesIO(requests.get("https://img.freepik.com/photos-gratuite/prise-vue-au-grand-angle-seul-arbre-poussant-sous-ciel-assombri-pendant-coucher-soleil-entoure-herbe_181624-22807.jpg?w=2000").content))

In [18]:
final = loaded_model.transform(fe.extract(img).reshape(1,-1))

In [19]:
search_by_image_query(final[0].tolist())

  return es_client.search(index=index, body = query, _source=source_no_vecs)
  return es_client.search(index=index, body = query, _source=source_no_vecs)


ObjectApiResponse({'took': 1186, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 10000, 'relation': 'gte'}, 'max_score': 0.55774045, 'hits': [{'_index': 'open-images', '_type': '_doc', '_id': '9a7801751269de88', '_score': 0.55774045, '_source': {'OriginalURL': 'https://c1.staticflickr.com/7/6191/6104916230_d8405be688_o.jpg', 'AuthorID': 'jgiuliano', 'ImageID': '9a7801751269de88', 'Title': 'Imagine waking up to this in the morning?', 'Tags': 'Food, Plant, Flower, Tree, Vegetable'}}, {'_index': 'open-images', '_type': '_doc', '_id': 'e869032547f65662', '_score': 0.55581534, '_source': {'OriginalURL': 'https://farm3.staticflickr.com/8336/8378373791_7510bafdf7_o.jpg', 'AuthorID': 'eduardorobles', 'ImageID': 'e869032547f65662', 'Title': 'La tarde (04)', 'Tags': 'Plant, Tree'}}, {'_index': 'open-images', '_type': '_doc', '_id': '2875eda0c2616e93', '_score': 0.5536822, '_source': {'OriginalURL': 'https://farm4.staticflickr.c