In [43]:
import numpy as np
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import os
from img2vec_pytorch import Img2Vec
from PIL import Image
import xml.etree.ElementTree as ET

In [2]:
elastic_client = Elasticsearch(hosts=['http://localhost:9200'],
                               basic_auth=('elastic', 'master'))
model = Img2Vec()



In [54]:
def generate_embedding(segment):
    #img = Image.open(filename).convert("RGB")
    vec = model.get_vec(segment)
    return vec.tolist()

In [55]:
def build_doc(no:str, name:str, classes, vector):
  document={ 
    "no": no,
    "name": name,
    "class_label": classes, 
    "embedding": vector
  }   
  return document

In [56]:
def get_files(path:str, extension:str):
    file_list = [f for f in os.listdir(path) if f.endswith(extension)]
    return file_list

In [59]:
#{[
#    {class: 'd10', segment_values: [100,100,100,100]},
#    {class: 'd40', segment_values: [100,100,100,100]}
#]}

def get_classes_segments(path:str, file_name:str):
    segments = []
    class_ = {}
    values = []
    tree = ET.parse(path+file_name)
    root = tree.getroot()
    for child in root:
        if (child.tag == 'object'):
            for new_child in child:
                if (new_child.tag == 'name'):
                    class_ = new_child.text
                if (new_child.tag == 'bndbox'):
                    for bndbox in new_child:
                        values.append(int(bndbox.text)) 
                    segments.append({'class': class_, 'segment_values': values})
                    values = []            
    return segments

In [60]:
dataset_path = "./data/road/train/"
index_name = "road_image_segment"
#segment = get_classes_segments(dataset_path, "4_png_jpg.rf.b3f95d13379e2606209b18e200655512.xml")
#print(segment)

In [65]:
id = 0
file_id = 0
actions = []
file_list = get_files(dataset_path, ".jpg")
actions = []
for file in file_list: 
    file_id += 1
    image_file = dataset_path+file
    xml_file = file[0:len(file)-4]+".xml"
    classes_segments = get_classes_segments(dataset_path, xml_file)
    image = Image.open(image_file).convert("RGB")
    print("Image: ", file_id, " -> ", image_file)
    for class_segment in classes_segments:
      id += 1
      class_ = class_segment.get("class")
      values = class_segment.get("segment_values") 
      xmin = values[0]
      xmax = values[1]
      ymin = values[2]
      ymax = values[3]
      segment = image.crop((xmin,ymin,xmax,ymax))
      vector = generate_embedding(segment)
      actions.append({"_index":index_name.lower(), 
                    "_source": build_doc(id, file, class_, vector)})
      if ((id % 50 ) == 0):
        bulk(elastic_client, actions)
        actions.clear()
        print("{} segments were indexed!!!".format(id))

if len(actions) != 0:
  bulk(elastic_client, actions)
  actions.clear()
  print("{} segments were indexed!!!".format(id))
print("Indexing was finished!!!")


Image:  1  ->  ./data/road/train/101_png_jpg.rf.3f4b3929e30f8638a51b9b0f38d0f0f1.jpg
Image:  2  ->  ./data/road/train/101_png_jpg.rf.a919969335ca2d75771416c572535b26.jpg
Image:  3  ->  ./data/road/train/102_png_jpg.rf.81d2aa480dde442a7fef54a0dac75833.jpg
Image:  4  ->  ./data/road/train/102_png_jpg.rf.e8790d34c94893497d54353ea75e5949.jpg
Image:  5  ->  ./data/road/train/104_png_jpg.rf.0ec4a38881c7d925f3f5b305f7add5e1.jpg
Image:  6  ->  ./data/road/train/104_png_jpg.rf.37729016cd10536d505d32efec43c262.jpg
Image:  7  ->  ./data/road/train/105_png_jpg.rf.0689f3b6985ec5395c1417d48a78fd50.jpg
Image:  8  ->  ./data/road/train/105_png_jpg.rf.8140976580a1196d9e0d5c42e7bcf17e.jpg
Image:  9  ->  ./data/road/train/106_png_jpg.rf.303f44c8479ad803c3d97fe9382f02c7.jpg
50 segments were indexed!!!
Image:  10  ->  ./data/road/train/106_png_jpg.rf.e96a6ad34a364ca818a47490042cc646.jpg
Image:  11  ->  ./data/road/train/107_png_jpg.rf.75b69b31cf39700d7c56c499281de6a4.jpg
Image:  12  ->  ./data/road/train/1