In [1]:
import weaviate
import json
client = weaviate.Client('http://localhost:8080')
schema = client.schema.get()
print(schema)

{'classes': []}


In [2]:
schema = {
    "classes": [
        {
            "class": "Dog",
            "description": "Images of different dogs",
            "moduleConfig": {
                "img2vec-neural": {
                    "imageFields": [
                        "image"
                    ]
                }
            },
            "vectorIndexType": "hnsw", 
            "vectorizer": "img2vec-neural", # the img2vec-neural Weaviate module
            "properties": [
                {
                    "name": "breed",
                    "dataType": ["string"],
                    "description": "name of dog breed",
                },
                {
                    "name": "image",
                    "dataType": ["blob"],
                    "description": "image",
                },
                {
                    "name": "filepath",
                    "dataType":["string"],
                    "description": "filepath of the images",
                }
            ]
        }
    ]
}

# adding the schema 
client.schema.create(schema)

In [3]:
schema = client.schema.get()
display(schema)

{'classes': [{'class': 'Dog',
   'description': 'Images of different dogs',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'img2vec-neural': {'imageFields': ['image']}},
   'properties': [{'dataType': ['string'],
     'description': 'name of dog breed',
     'moduleConfig': {'img2vec-neural': {}},
     'name': 'breed',
     'tokenization': 'word'},
    {'dataType': ['blob'],
     'description': 'image',
     'moduleConfig': {'img2vec-neural': {}},
     'name': 'image'},
    {'dataType': ['string'],
     'description': 'filepath of the images',
     'moduleConfig': {'img2vec-neural': {}},
     'name': 'filepath',
     'tokenization': 'word'}],
   'replicationConfig': {'factor': 1},
   'shardingConfig': {'virtualPerPhysical': 128,
    'desiredCount': 1,
    'actualCount': 1,
    'desiredVirtualCount': 128,
    'actualVirtualCount': 128,
    'key': '_i

In [4]:
from pathlib import Path
import base64


img_path = Path("./flask-app/static/img/")
encoded_path = Path("base64_images")
encoded_path.mkdir(exist_ok=True)

for file_path in img_path.iterdir():
    with file_path.open("rb") as f:
        encoded = base64.b64encode(f.read())
    with (encoded_path / file_path.stem).open("wb+") as f:
        f.write(encoded)


print("The images have been converted to base64.")

The images have been converted to base64.


In [5]:
client.batch.configure(
    batch_size=100, 
    dynamic=True,
    timeout_retries=3,
    callback=None,
)
    
with client.batch as batch:
    batch.delete_objects(
        class_name="Dog",
        # same where operator as in the GraphQL API
        where={
            "operator": "NotEqual",
            "path": ["breed"],
            "valueString": "x"
        },
        output="verbose",
    )

encoded_path = Path("base64_images")
with client.batch as batch:
    # Iterate over all files in the base64_images folder
    for encoded_file_path in encoded_path.iterdir():
        with encoded_file_path.open() as f:
            file = f.read()

        base64_encoding = file.replace("\n", "").replace(" ", "") 

        # The properties from our schema
        data_properties = {
            "breed": encoded_file_path.stem,
            "image": base64_encoding,
            "filepath": encoded_file_path.stem + ".jpg",
        }

        batch.add_data_object(data_properties, "Dog")

print("The objects have been uploaded to Weaviate.")

The objects have been uploaded to Weaviate.


In [6]:
client.query.get(
    class_name="Dog",
    properties=["filepath", "breed", "image"]
).do()


{'data': {'Get': {'Dog': [{'breed': 'Labrador-Retriever',
     'filepath': 'Labrador-Retriever.jpg',
     'image': '/9j/4AAQSkZJRgABAQEAYABgAAD/7QA4UGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAAAA4QklNBCUAAAAAABDUHYzZjwCyBOmACZjs+EJ+/+EAjEV4aWYAAE1NACoAAAAIAAUBEgADAAAAAQABAAABGgAFAAAAAQAAAEoBGwAFAAAAAQAAAFIBKAADAAAAAQACAACHaQAEAAAAAQAAAFoAAAAAAAAAYAAAAAEAAABgAAAAAQADoAEAAwAAAAEAAQAAoAIABAAAAAEAAAMgoAMABAAAAAEAAAIVAAAAAP/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRocHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/bAEMBCQkJDAsMGA0NGDIhHCEyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAhUDIAMBIgACEQEDEQH/xAAcAAACAgMBAQAAAAAAAAAAAAADBAIFAAEGBwj/xAA6EAABBAIBAwIFAwIFAwQDAQABAAIDEQQhMQUSQVFhBhMicYEUkaEysQcVI8HRQuHwFiQz8SVSYkP/xAAaAQADAQEBAQAAAAAAAAAAAAABAgMABAUG/8QALhEAAgICAgICAQQBBAIDAAAAAAECEQMhEjEEQRNRIgUyYYFxFCNCoZHwM7HB/9oADAMBAAIRAxEAPwDzib+EoQe4C/KfmaCaCAyBz3UAvEg0kVb2QDDdeVLtIFH7KxjwrbZ/CHJiEnRSrKroslqxaCIjjR9VZ42MZDvmvCXjjLXAEeeVb4oDaGtKGbI6tDKCDY+GCNi6TD8QFlAEJyAN+XYHKKe0

In [7]:
client.data_object.get(with_vector=True)

{'deprecations': None,
 'objects': [{'class': 'Dog',
   'creationTimeUnix': 1681471259044,
   'id': '01e3c2a2-7d4d-41e6-a19b-5ceb57dc7190',
   'lastUpdateTimeUnix': 1681471259044,
   'properties': {'breed': 'Labrador-Retriever',
    'filepath': 'Labrador-Retriever.jpg',
    'image': '/9j/4AAQSkZJRgABAQEAYABgAAD/7QA4UGhvdG9zaG9wIDMuMAA4QklNBAQAAAAAAAA4QklNBCUAAAAAABDUHYzZjwCyBOmACZjs+EJ+/+EAjEV4aWYAAE1NACoAAAAIAAUBEgADAAAAAQABAAABGgAFAAAAAQAAAEoBGwAFAAAAAQAAAFIBKAADAAAAAQACAACHaQAEAAAAAQAAAFoAAAAAAAAAYAAAAAEAAABgAAAAAQADoAEAAwAAAAEAAQAAoAIABAAAAAEAAAMgoAMABAAAAAEAAAIVAAAAAP/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRocHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/bAEMBCQkJDAsMGA0NGDIhHCEyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAhUDIAMBIgACEQEDEQH/xAAcAAACAgMBAQAAAAAAAAAAAAADBAIFAAEGBwj/xAA6EAABBAIBAwIFAwIFAwQDAQABAAIDEQQhMQUSQVFhBhMicYEUkaEysQcVI8HRQuHwFiQz8SVSYkP/xAAaAQADAQEBAQAAAAAAAAAAAAABAgMABAUG/8QALhEAAgICAgICAQQBBAIDAAAAAAECEQMhEjEEQRNRIgUyYYFxFCN