# Insert Images into Weaviate

Read all images in DATA_DIR, normalize them and insert them into the Weaviate vector database using `img2vec` vectorizer.

In [1]:
# !pip install ipyplot weaviate-client opencv-python

In [2]:
import os
import cv2
import uuid
import base64
import ipyplot
import weaviate

In [3]:
DATA_DIR = "data/stamps"
IMAGE_DIM = (100, 100)

In [4]:
def clear_all(client):
    client.schema.delete_all()


def define_schema(client):
    class_obj = {
        "class": "Stamp",
        "description": "",
        "moduleConfig": {
            "img2vec-neural": {
                "imageFields": [
                    "image"
                ]
            }
        },
        "properties": [
            {
                "dataType": [
                    "blob"
                ],
                "description": "Image",
                "name": "image"
            },
            {
                "dataType": [
                    "string"
                ],
                "description": "",
                "name": "path"
            }
            
        ],
        "vectorIndexType": "hnsw",
        "vectorizer": "img2vec-neural"
    }
    
    client.schema.create_class(class_obj)

In [5]:
def prepare_image(file_path: str):
    """Read image from file_path
    and resize it to a fixed size square
    """
    img = cv2.imread(file_path)
    # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(img, IMAGE_DIM, interpolation= cv2.INTER_LINEAR)
    return resized

In [6]:
client = weaviate.Client("http://localhost:8080")

# clean existing schema (delete all associated data)
clear_all(client)

# create some classes
define_schema(client)

In [7]:
def insert_data(client):
    for fn in os.listdir(DATA_DIR):
        fp = os.path.join(DATA_DIR, fn)
        img = prepare_image(fp)
        jpg_img = cv2.imencode('.jpg', img)
        b64_string = base64.b64encode(jpg_img[1]).decode('utf-8')
        data_properties = {
            "path": fn,
            "image": b64_string,
        }
        r = client.data_object.create(data_properties, "Stamp", str(uuid.uuid4()))
        print(fp, r)

In [8]:
insert_data(client)

data/stamps/Skateboard_2004_GF.jpg a79508ce-5e9d-4549-a962-5654f51e59af
data/stamps/Signac_2003_GF.jpg f37036ca-8663-453b-a01a-645bc7cf9814
data/stamps/Chasseur_2004_GF.jpg 321d6a40-dc5c-479e-b478-c192aae7e353
data/stamps/Parapente_2004_GF.jpg ec31cfd0-30b6-491b-9a71-cd7b7cacc103
data/stamps/Course_landaise_GF.jpg 42e13189-e725-4843-b6ab-27747ec1d86f
data/stamps/Jacqueline_Auriol_GF.jpg b72f50bd-5096-4542-952b-5f4431b576ad
data/stamps/Franc_maconnerie_2003_GF.jpg 703f435f-61a9-446e-9846-5ab4c409ea15
data/stamps/Ecoliere_GF.jpg 11b5fbfa-08e6-478e-971a-570f73d7fa43
data/stamps/Vacances_2003_GF.jpg eb61f4ba-7520-449d-a74f-9dde9b305668
data/stamps/De_Gaulle_2019.jpg 995a5640-91b1-4080-9843-e175a0386b37
data/stamps/Marche_Provence_GF.jpg 0e1e25b3-aa9b-4938-9eb3-f7f6ad587b24
data/stamps/Nu_Despiau_2019.jpg 1cb10b22-9cbf-4772-a42c-531687cc6b2f
data/stamps/Tissu_africain_10_2019.jpg a933d7f8-43bc-4a27-97a5-09793d192cb0
data/stamps/Styles_Arc_Senans_2019.jpg 30eb5324-c7ec-41b1-a0a7-a4c4e9cf3816

data/stamps/Gavroche_2003_GF.jpg 7d09e3c4-f688-44da-a090-af6ca9d4352d
data/stamps/UNESCO_075_2003_GF.jpg 47f1f7d5-d08b-4a2f-ac57-ae342825bdb4
data/stamps/Styles_Chambord_2019.jpg d3730ec3-1bed-44e1-b3da-f4c18ffe6529
data/stamps/Helsinki_Suomenlinna_2019.jpg 64915b28-659b-4965-b1a2-93d69496994b
data/stamps/Conseil_Europe_050_2003_GF.jpg 4fcd235a-f0f7-446c-99f0-642f5efec55b
data/stamps/Emilie_Chatelet_2019.jpg 81f35300-a089-451d-81c0-eca0db619a7a
data/stamps/Claudine_2003_GF.jpg 058a0fcb-32ab-4f65-ba86-3eb54da0632f
data/stamps/Le_pain_GF.jpg c5570c98-0c44-44f9-8394-b3fafd809756
data/stamps/Nu_Egyptienne_2019.jpg f1b2a781-0986-402a-9b81-29cb8d53c918
data/stamps/Grenadier_2004_GF.jpg c844cee1-1cbb-4bcd-9bac-073f6a03a81f
data/stamps/Citroen_Traction_cabriolet_2019.jpg 061a0708-a63b-48ae-b07b-245513a578eb
data/stamps/Mont_Blanc_2003_GF.jpg 11c7793b-5c6a-4df7-9951-0ba115e11675
data/stamps/Saint_Pere_2003_GF.jpg bce9c6c4-4b49-480a-a5cb-3241754361f2
data/stamps/Styles_St_Nectaire_2019.jpg 2bea4

In [9]:
nb_images = 12
offset = 0
image_list = [os.path.join(DATA_DIR, fn) for fn in os.listdir(DATA_DIR)[offset:offset+nb_images]]
ipyplot.plot_images(image_list, show_url=False)