In [1]:
import os
import json
from elasticsearch import Elasticsearch, helpers
from dotenv import load_dotenv

from langchain_google_vertexai.embeddings import VertexAIEmbeddings
from langchain_community.vectorstores import ElasticsearchStore
from langchain_core.documents import Document

from google.cloud import storage


In [2]:
load_dotenv()

es_host = os.getenv("ELASTICSEARCH_HOSTS")
es_client = Elasticsearch(es_host)

In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(os.environ["GOOGLE_APPLICATION_CREDENTIALS_PATH"])

embeddings_model = VertexAIEmbeddings(
    model_name="text-multilingual-embedding-002",
    project=os.getenv("PROJECT_ID"),
    location=os.getenv("LOCATION"),
)

# Initialize GCP
# gcp_cred = os.environ.get("GCP_CRED")
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(gcp_cred) #"gcp_credentials.json"

GCS_BUCKET = os.environ.get("GCS_BUCKET_NAME")
bucket = storage.Client().bucket(GCS_BUCKET)

base_url = "https://storage.googleapis.com"



In [4]:
index_name = str(os.getenv("INDEX_NAME"))
mapping_data_format = "./product/new_tile.json"

In [5]:
with open(mapping_data_format) as f:
    mapping = json.load(f)

print(f"\n--- Creating/Updating Index '{index_name}' ---")
if es_client.indices.exists(index=index_name):
    print(f"\nConnected to existing index '{index_name}'.")
else:
    print(f"Creating index '{index_name}' with the specified mapping...")
    try:
        es_client.indices.create(index=index_name, body=mapping)
        print(f"Index '{index_name}' created successfully with mapping.")
    except Exception as e:
        print(f"Error creating index: {e}")

# # Verify the created mapping
print(f"\n--- Verifying Mapping for '{index_name}' ---")
retrieved_mapping = es_client.indices.get_mapping(index=index_name)


--- Creating/Updating Index 'tiles' ---
Creating index 'tiles' with the specified mapping...
Index 'tiles' created successfully with mapping.

--- Verifying Mapping for 'tiles' ---


In [6]:
def generate_embedding(text: str) -> list:
    """
    Generates a dense vector embedding for a given text using VertexAIEmbeddings.
    """
    try:
        # VertexAIEmbeddings.embed_query returns a list of floats (the vector)
        embedding_vector = embeddings_model.embed_query(text)
        return embedding_vector
    except Exception as e:
        print(f"Error generating embedding for text '{text}': {e}")
        return [0.0] * 768 # Return a zero vector or handle as appropriate for error


In [7]:
bucket_name = GCS_BUCKET
sub_folder = "ai-assistant/textures"

client = storage.Client()
prefix = sub_folder if sub_folder.endswith("/") else sub_folder + "/"
blobs = client.list_blobs(bucket_name, prefix=prefix)

# print(f"Objects under '{bucket_name}/{prefix}':")
for blob in blobs:
    if blob.name.endswith('.json'):
        print(blob.name)
        json_data = blob.download_as_text()
        # Parse JSON
        data = json.loads(json_data)
        doc_sku = data["SKU"]
        combined_text = " ".join([data["Surface_applicability"],
                                    data["Species"],
                                    data["Color"],
                                    data["description"],
                                    data["style"],]).strip()
        data["embedding"] = generate_embedding(combined_text)
        es_client.index(index=index_name,id=doc_sku,document=data)

es_client.indices.refresh(index=index_name)
print(f"Index '{index_name}' refreshed.")

ai-assistant/textures/Z21B0Q991BVES001/Z21B0Q991BVES001.json
ai-assistant/textures/Z21B0Q991BVES011/Z21B0Q991BVES011.json
ai-assistant/textures/Z21B0Q991BVES021/Z21B0Q991BVES021.json
ai-assistant/textures/Z21B1D57ANAVI301/Z21B1D57ANAVI301.json
ai-assistant/textures/Z21B1D57APACFD01/Z21B1D57APACFD01.json
ai-assistant/textures/Z21B1Q991LT15811/Z21B1Q991LT15811.json
ai-assistant/textures/Z21B1Q991LT16041/Z21B1Q991LT16041.json
ai-assistant/textures/Z21C2D26CPOLL701/Z21C2D26CPOLL701.json
ai-assistant/textures/Z21C2D26CPOLL711/Z21C2D26CPOLL711.json
ai-assistant/textures/Z21FAA40010821A1/Z21FAA40010821A1.json
ai-assistant/textures/Z21FAA40100090F1/Z21FAA40100090F1.json
ai-assistant/textures/Z21FAA40100303A1/Z21FAA40100303A1.json
ai-assistant/textures/Z21FAA40100308A1/Z21FAA40100308A1.json
ai-assistant/textures/Z21FAA40910296A1/Z21FAA40910296A1.json
ai-assistant/textures/Z21FAA40910533A1/Z21FAA40910533A1.json
ai-assistant/textures/Z21FIA55010327U1/Z21FIA55010327U1.json
ai-assistant/textures/Z2

In [None]:
# response = es_client.search(index=index_name, size=1)  # adjust size as needed
# for hit in response["hits"]["hits"]:
#     print(f"  ID: {hit['_id']} SKU: {hit['_source']['SKU']}, Name: {hit['_source']['Name']} Score: {hit['_score']:.2f}, Description: {hit['_source']['description']}")

In [8]:
count = es_client.count(index=index_name)['count']
print(f"data rows: {count}")

for hit in helpers.scan(es_client, index=index_name):
    print(f"  ID: {hit['_id']} SKU: {hit['_source'].get('SKU')} Name: {hit['_source'].get('Name')}, Description: {hit['_source'].get('description')} image_path: {hit['_source'].get('image_path')}")

data rows: 110
  ID: Z21B1D57APACFD01 SKU: Z21B1D57APACFD01 Name: GP XP-PACIFIC TEMPO (POL) R/T 60X120 PM, Description: The tile image features a dark blue background with striking gold and white veining, resembling natural marble. The surface appears smooth and polished, enhancing the luxurious feel of the material. The overall impression is one of elegance and sophistication, making it suitable for high-end interior designs. / ภาพกระเบื้องมีพื้นหลังสีน้ำเงินเข้มพร้อมเส้นสายสีทองและสีขาวที่โดดเด่นคล้ายกับหินอ่อนธรรมชาติ พื้นผิวดูเรียบเนียนและขัดเงาเพิ่มความรู้สึกหรูหราให้กับวัสดุ ความประทับใจโดยรวมคือความสง่างามและความซับซ้อนทำให้เหมาะสำหรับการออกแบบตกแต่งภายในระดับไฮเอนด์ image_path: https://storage.googleapis.com/weshape-dev/ai-assistant/textures/Z21B1D57APACFD01/Z21B1D57APACFD01.jpg
  ID: Z21B1Q991LT15811 SKU: Z21B1Q991LT15811 Name: LT 23X152 LIDIA LM-SNE02 (6.0/0.5mm.)IX, Description: The tile image features a dark brown wood grain pattern, giving it a natural and rustic appearanc

### Delete INDEX

In [None]:
# if es_client.indices.exists(index=index_name):
#     es_client.indices.delete(index=index_name)
#     print(f"Index {index_name} deleted.")
# else:
#     print(f"Index {index_name} does not exist.")

### Query test (keyword search and vector search)

In [9]:
query_text_semantic1 = "กระเบื้องplainสไตล์ Minimalist"
query_vec_semantic2 = generate_embedding(query_text_semantic1)

response = es_client.search(index=index_name, body={
    "knn": {
    "field": "embedding",
    "query_vector": query_vec_semantic2,
    "k": 5,
    "num_candidates": 10,
    },
    "query": {
        "multi_match": {
            "query": query_text_semantic1,
            "fields": ["description", 
                       "description.thai", 
                       "description.standard",
                       "style^1.5",
                       "Color",
                       "Surface_applicability",
                       "Species^2"],
            "type": "most_fields"
        }
    },
    "size": 5,
    "_source": ["SKU","Name","Surface_applicability","description", "Color","style","Species"]
})
for hit in response['hits']['hits']:
    print(f"  ID: {hit['_id']} SKU: {hit['_source']['SKU']}, Name: {hit['_source']['Name']} Score: {hit['_score']:.2f}")

  ID: Z21GXA40100021B1 SKU: Z21GXA40100021B1, Name: FT MERLILYN SATIN WHITE 12X12 PM Score: 18.55
  ID: Z21TAA32100101A3 SKU: Z21TAA32100101A3, Name: GRESHAM(MOULD),20x40 (R) (PJ) (C) Score: 18.35
  ID: Z21UXA48100996D1 SKU: Z21UXA48100996D1, Name: FT DECEMBER GREEN 40X40 PM Score: 16.79
  ID: Z21GXA29100147B1 SKU: Z21GXA29100147B1, Name: FT/WT ECO-TERRA BONE 20X20 PM Score: 12.79
  ID: Z21GXA29100173B1 SKU: Z21GXA29100173B1, Name: FT/WT ECO-TERRA GREY 20X20 PM Score: 11.35
