In [162]:
# 必要なライブラリのインストール
%pip install -U weaviate-client python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [163]:
# 環境変数の読み込み
%load_ext dotenv
%dotenv -o

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [165]:
# クライアントの初期化
import json
import glob
import os
import base64
import weaviate

client = weaviate.Client(
    url = "http://" + os.environ.get("WEAVIATE_HOST"),
    additional_headers = {
        "X-OpenAI-Api-Key": os.environ.get("OPENAI_APIKEY")
    }
)

In [166]:
# スキーマ定義
class_obj = {
    "class": "Cifar10",
    "description": "Each example is a 28x28 color image, associated with a label from 10 classes.",
    "vectorizer": "img2vec-neural",
    "moduleConfig": {
        "img2vec-neural": {
            "imageFields": [
                "image"
            ]
        }
    },
    "properties": [
        {
            "dataType": [
                "blob"
            ],
            "description": "color image",
            "name": "image"
        },
        {
            "dataType": [
                "text"
            ],
            "description": "file name",
            "name": "fileName"
        }
    ],
}

# 初回のみcreate_classを実行します
client.schema.create_class(class_obj)

In [167]:
# スキーマ登録できているか確認
client.schema.get("Cifar10")

{'class': 'Cifar10',
 'description': 'Each example is a 28x28 color image, associated with a label from 10 classes.',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'moduleConfig': {'img2vec-neural': {'imageFields': ['image']}},
 'properties': [{'dataType': ['blob'],
   'description': 'color image',
   'moduleConfig': {'img2vec-neural': {}},
   'name': 'image'},
  {'dataType': ['text'],
   'description': 'file name',
   'moduleConfig': {'img2vec-neural': {}},
   'name': 'fileName',
   'tokenization': 'word'}],
 'shardingConfig': {'virtualPerPhysical': 128,
  'desiredCount': 1,
  'actualCount': 1,
  'desiredVirtualCount': 128,
  'actualVirtualCount': 128,
  'key': '_id',
  'strategy': 'hash',
  'function': 'murmur3'},
 'vectorIndexConfig': {'skip': False,
  'cleanupIntervalSeconds': 300,
  'maxConnections': 64,
  'efConstruction': 128,
  'ef': -1,
  'dynamicEfMin': 100,
  'd

In [138]:
def add_image_to_weaviate(image_path):
    # 画像をバイナリ形式で読み込む
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

    # Weaviateに追加するデータオブジェクトを作成
    data_object = {
        "image": encoded_image,
        "fileName": os.path.basename(image_path),
    }

    # データをWeaviateに追加
    client.data_object.create(data_object, "Cifar10")

In [168]:
# 画像を格納する
img_paths = glob.glob('img/*.png')
print(f"image count: {len(img_paths)}")
for img_path in img_paths:
    add_image_to_weaviate(img_path)

image count: 102


In [172]:
near_img_path = "test/39_dog.png"
certainty = 0.90

result = (
  client.query
  .get("Cifar10", ["image", "fileName"])
  .with_near_image({"image": near_img_path, "certainty": certainty})
  .do()
)

print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Cifar10": [
        {
          "fileName": "83_dog.png",
          "image": "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAALxElEQVR4XmMMn3ieg4eP4f/////+/f/9l4fzvwgfk5oMLw8H469f3//9Y/j399/vX7+Zmf6zsTGJCPLw83Bwc7KwMjMzMTD//PHv5bsfD198ef7p77sfDD8Y2X4BFf/5/e/vXzYWVm5Ontf3LrD8+8Pw989/RkagFUz/mRi+/Pz7591fln8fRPiY+XnY2Jj/Mv39+u3Di49f3v/7/YNPQ5WNU5KTkY+bnZWLlZWF57+sILOWHMfjd1/vv/5++9mPd1+ZvjCw/GH+x8TMwMjE9J+BgeXvn79/f/9hANrAyAC05d9/ph//GB+//f763Q/O/985/71i/PHi89uHP79++vf3z68PTz4qqSgoqUpKSHIJCTMwsbAxsbCy/GcU+M/859fXd2/+fmP5/YeLgZmJiYGJgeHPv/9/Wf78+cP08zcTE9A+RkZGpr/////+///XX+YPHz/8enGN8ftdhu/P/v76wsL8n4ODnZubi5uHW0BIiI+Pn5tXgI2FGWjpjx9fP3189/X9S7bvD3n/sP9kEGRklPzLwP7/H8Pf3/+AQfTvzy+QD4CmMzH9+8/A+P/vv3/fv3x/9fDjw+Ocfz+yMnwHeo2Ji5OFhe3Hzx+fvnz49PXdu89CzJzcnGxsP75+fv/2xZePbz9/fPP749ufH399/8bGJMTwn4n51y+e33+Aof3r19//jAz/gODv3z+///wGop8Mfz79+fTs7+/vX358Y/n/i5Ob4+u3n1++/Xj17uPnH384+AQ5eES4eASAij99eP/s2fMnj+9+fP+KlZHh/cev77/8Z2cQY+Tn/cXG8u/fX5bvnz4Bg+f3988/v3388eXD

In [170]:
near_img_path = "test/28_truck.png"
certainty = 0.88

result = (
  client.query
  .get("Cifar10", ["image", "fileName"])
  .with_near_image({"image": near_img_path, "certainty": certainty})
  .do()
)

print(json.dumps(result, indent=2))

{
  "data": {
    "Get": {
      "Cifar10": [
        {
          "fileName": "16_truck.png",
          "image": "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAALEklEQVR4XmN8/eYdAwMDEwwwAgET03/m/0BBEJuBgfk/IyMDFPyHsf4BwV+G/2AAZAKlwUwIDRQHyf/68/fPv/8sf/78ARr0HyYPtIjh37//DCCjICb/Z2D8DzYfbCcDzCygBkYgBpoEJKEGMzDA2SAb/v3/95eRBeJSZmZmKIOJmZEJ6M5/jBD3M4CcD9EGFAVaCzIYJMbIAIRgRXBD/8MA0DSIt4BqWIBOZgSHASiQmJmApjMxAfkgghFmENyZEENBdgAdy8QIsQxuLtwfQAYz2BJg+DIxMDL/Y2AChgMjEwNQCwMj0O3/mYGijExAuf///gFVs7GxAa0HufTff2B4MTFCvPefCeQJIMkIiUFGmK8hkcrGyszGwsTyB2QEKBYYgQxGoH5gUANDghnIZWZhYWJm/vTl87Nnz0RERHh5eVlYWIDW/Pv7FxwssEhhAnoHZAckJfwFy4K89e//n9+/WBhYmH///v0fGCF/GEB+YQLGMQMT0HI2ts+fP12/cePw4cN37tyRlJRUUVFRU1NTVFQUFBQEOvnXr18Qs0CJBuQwaFBBAgoYqkAFp8+cZtxz7IS0tDQ7MBCAKZIJFHRADR/evrp54/qtW7c+fPjIz8fHz88PdMTHjx9//PjBxcUlJyenpKQE1CUgIMDKygr0/u8//+AO/wdJQP////796+q1K8yvPnwUEhSUlZUFqgAmT6BXb966tXrVyidPnoiKienoaCvIyQMNEhYRAXpCWFgY6E6g1I0bN4DW379///v376ysbJxcXEA//QUm+b9/ISkCFIaM/0VFRZm5+QSevXghKC7JLyTKwMzEwML09+8vxj