## 環境準備

In [None]:
!pip install -q boto3 pandas opensearch-py requests_aws4auth python-dotenv

.envファイルにクレデンシャル情報を書き込む

In [None]:
from dotenv import load_dotenv

# .envファイルから環境変数を読み込む
load_dotenv(override=True)



## OpenSearch インデックスの作成

In [None]:
import boto3
import json
import base64
import pandas as pd
from opensearchpy import OpenSearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
from IPython.display import Image, display
import os

AWS_ACCESS_KEY_ID=os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY=os.environ.get("AWS_SECRET_ACCESS_KEY")

# Bedrock
bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )

# OpenSearch
service = "aoss"  # must set the service as 'aoss'
region = "us-east-1"
dimensions = 1024
index_name = "test-index"

credentials = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY).get_credentials()


awsauth = AWS4Auth(
    AWS_ACCESS_KEY_ID,
    AWS_SECRET_ACCESS_KEY,
    region,
    service
)

# OpenSearchのクライアント定義
client = OpenSearch(
    hosts = "https://kgveni0x6hgcq1h8lnjh.us-east-1.aoss.amazonaws.com",
    http_auth = awsauth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    engine = "faiss",
    timeout = 300,
)

if not client.indices.exists(index_name):
    # OpenSearchのインデックス作成
    client.indices.create(index_name,
        body={
            "settings":{
                "index.knn": True
            },
            "mappings":{
                "properties": {
                    "values": {
                        "type": "knn_vector",
                        "dimension": dimensions
                    },
                    "title": {
                        "type": "text"
                    },
                    "imagePath": {
                        "type": "text"
                    }
                }
            }
        }
    )



## 画像のベクトルをOpenSearch のドキュメントに登録

In [None]:

df = pd.read_csv("./img/shoes/data.csv")

for _, row in df.iterrows():
    title = row["title"]
    image_path = row["img"]

    with open(image_path, "rb") as image_file:
        input_image = base64.b64encode(image_file.read()).decode("utf8")


    body = json.dumps({"inputText": title, "inputImage": input_image})
    
    # Bedrockを呼び出しベクトル化
    response = bedrock_runtime.invoke_model(
        body=body,
        modelId="amazon.titan-embed-image-v1",
        accept="application/json",
        contentType="application/json",
    )

    response_body = json.loads(response.get("body").read())

    vector_body = response_body.get("embedding")
    
    vectors = {"values": vector_body, "title": title, "imagePath": image_path}
    
    # OpneSearchにドキュメントを登録
    response = client.index(index=index_name, body=vectors)

## ドキュメント数が登録した画像と同じ数になるまで確認、待機。

## ベクトル検索のための関数を定義

In [None]:
def vectorQuery(body):
    # Bedrock呼び出しでベクトルを取得。
    query_response = bedrock_runtime.invoke_model(
        body=body,
        modelId="amazon.titan-embed-image-v1",
        accept="application/json",
        contentType="application/json",
    )
    response_body = json.loads(query_response.get("body").read())
    query_body = response_body.get("embedding")
    # print(query_body)
    
    
    # OpenSearchにクエリを実施。
    search_query = {
        "size": 1,
        "query": {
            "knn": {
                "values": {
                    "vector": query_body,
                    "k": 1
                }
            }
        }
    }
    results = client.search(index=index_name, body=search_query)
    
    
    # OpenSearchにクエリを実施。
    for hit in results["hits"]["hits"]:
        display(Image(hit["_source"]["imagePath"]), print(hit["_source"]["title"]))
    

## テキストでのセマンティック検索

In [None]:
# クエリとなる文字列
body = json.dumps(
    {"inputText": "フォーマルな場におすすめの靴は？"}
)
vectorQuery(body)




## 画像での検索

In [None]:
# クエリとなる画像
Image("./img/shoes/query/white-sneakers.png")

In [None]:
# 靴の画像を読み込んで、inputImageをキーとしたシリアライズ
with open("./img/shoes/query/white-sneakers.png", "rb") as image_file:
  input_image = base64.b64encode(image_file.read()).decode("utf8")
body = json.dumps({"inputImage": input_image})
vectorQuery(body)


## マルチモーダルセマンティック検索

In [None]:
# 靴の画像を読み込んで、inputImageをキーとしたシリアライズ
with open("./img/shoes/query/white-sneakers.png", "rb") as image_file:
  input_image = base64.b64encode(image_file.read()).decode("utf8")
body = json.dumps({"inputImage": input_image,"inputText": "これの赤いやつ"} )
vectorQuery(body)
