In [1]:
import os
import json
from elasticsearch import Elasticsearch, helpers
from dotenv import load_dotenv

from langchain_google_vertexai.embeddings import VertexAIEmbeddings
from langchain_community.vectorstores import ElasticsearchStore
from langchain_core.documents import Document


In [2]:
load_dotenv()

es_host = os.getenv("ELASTICSEARCH_HOSTS")
es_client = Elasticsearch(es_host)

In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(os.environ["GOOGLE_APPLICATION_CREDENTIALS_PATH"])

embeddings_model = VertexAIEmbeddings(
    model_name="text-multilingual-embedding-002",
    project=os.getenv("PROJECT_ID"),
    location=os.getenv("LOCATION"),
)



In [4]:
index_name = str(os.getenv("INDEX_NAME"))
dataset_folder = "./datasets"
mapping_data_format = "./product/new_tile.json"

In [5]:
with open(mapping_data_format) as f:
    mapping = json.load(f)

print(f"\n--- Creating/Updating Index '{index_name}' ---")
if es_client.indices.exists(index=index_name):
    print(f"\nConnected to existing index '{index_name}'.")
else:
    print(f"Creating index '{index_name}' with the specified mapping...")
    try:
        es_client.indices.create(index=index_name, body=mapping)
        print(f"Index '{index_name}' created successfully with mapping.")
    except Exception as e:
        print(f"Error creating index: {e}")

# # Verify the created mapping
print(f"\n--- Verifying Mapping for '{index_name}' ---")
retrieved_mapping = es_client.indices.get_mapping(index=index_name)


--- Creating/Updating Index 'tiles' ---
Creating index 'tiles' with the specified mapping...
Index 'tiles' created successfully with mapping.

--- Verifying Mapping for 'tiles' ---


In [6]:
def generate_embedding(text: str) -> list:
    """
    Generates a dense vector embedding for a given text using VertexAIEmbeddings.
    """
    try:
        # VertexAIEmbeddings.embed_query returns a list of floats (the vector)
        embedding_vector = embeddings_model.embed_query(text)
        return embedding_vector
    except Exception as e:
        print(f"Error generating embedding for text '{text}': {e}")
        return [0.0] * 768 # Return a zero vector or handle as appropriate for error


In [7]:
json_folder = dataset_folder
for filename in os.listdir(json_folder):
    if filename.endswith('.json'):
        #filejson
        json_path = os.path.join(json_folder, filename)
        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                combined_text = " ".join([data["Surface_applicability"],
                                          data["Species"],
                                          data["Color"],
                                          data["Groove_size"],
                                          data["description"],
                                          data["style"],]).strip()
                data["embedding"] = generate_embedding(combined_text)
                # print(data["embedding"])
                es_client.index(index=index_name, document=data)
        except Exception as e:
            print(e)
es_client.indices.refresh(index=index_name)
print(f"Index '{index_name}' refreshed.")

Index 'tiles' refreshed.


In [8]:
# response = es_client.search(index=index_name, size=1)  # adjust size as needed
# for hit in response["hits"]["hits"]:
#     print(f"  ID: {hit['_id']} SKU: {hit['_source']['SKU']}, Name: {hit['_source']['Name']} Score: {hit['_score']:.2f}, Description: {hit['_source']['description']}")

In [9]:
count = es_client.count(index=index_name)['count']
print(f"data rows: {count}")

for hit in helpers.scan(es_client, index=index_name):
    print(f"  ID: {hit['_id']} SKU: {hit['_source'].get('SKU')} Name: {hit['_source'].get('Name')}, Description: {hit['_source'].get('description')}")

data rows: 30
  ID: K9wwyZcBZASVAhmA1eF3 SKU: Z21GXA40100147C1 Name: FT VILLE (II) BEIGE 12x12 PM, Description: The tile features a soft, creamy beige color with a subtle, slightly textured surface. It appears to have a matte finish, giving it a natural and understated look. The overall impression is one of warmth and simplicity, making it suitable for various interior applications./ กระเบื้องมีสีเบจอ่อนนุ่มและมีพื้นผิวที่ละเอียดอ่อนเล็กน้อย ดูเหมือนว่าจะมีผิวเคลือบด้านทำให้ดูเป็นธรรมชาติและเรียบง่าย ความประทับใจโดยรวมคือความอบอุ่นและความเรียบง่ายทำให้เหมาะสำหรับการใช้งานภายในที่หลากหลาย
  ID: LNwwyZcBZASVAhmA1-EF SKU: Z21GXB31010181C1 Name: WT BRICKA(II) WHITE 8X12 PM, Description: The tile is white with a subtle, textured surface featuring a linear pattern that adds depth without being overwhelming. The rectangular shape and clean lines give it a modern and versatile look. / กระเบื้องสีขาวที่มีพื้นผิวเป็นลายนูนเล็กน้อย มีลวดลายเส้นตรงที่เพิ่มความลึกโดยไม่ทำให้ดูลายตา รูปทรงสี่เหลี่ยม

### Delete INDEX

In [10]:
# if es_client.indices.exists(index=index_name):
#     es_client.indices.delete(index=index_name)
#     print(f"Index {index_name} deleted.")
# else:
#     print(f"Index {index_name} does not exist.")

### Query test (keyword search and vector search)

In [12]:
query_text_semantic1 = "กระเบื้องplainสไตล์ Minimalist"
query_vec_semantic2 = generate_embedding(query_text_semantic1)

response = es_client.search(index=index_name, body={
    "knn": {
    "field": "embedding",
    "query_vector": query_vec_semantic2,
    "k": 5,
    "num_candidates": 10,
    },
    "query": {
        "multi_match": {
            "query": query_text_semantic1,
            "fields": ["description", 
                       "description.thai", 
                       "description.standard",
                       "style^1.5",
                       "Color",
                       "Surface_applicability",
                       "Species^2"],
            "type": "most_fields"
        }
    },
    "size": 5,
    "_source": ["SKU","Name","Surface_applicability","description", "Color","style","Species"]
})
for hit in response['hits']['hits']:
    print(f"  ID: {hit['_id']} SKU: {hit['_source']['SKU']}, Name: {hit['_source']['Name']} Score: {hit['_score']:.2f}")

  ID: J9wwyZcBZASVAhmAzOEJ SKU: Z21GXA40100021B1, Name: FT MERLILYN SATIN WHITE 12X12 PM Score: 14.18
  ID: ItwwyZcBZASVAhmAv-GI SKU: Z21GXA29100147B1, Name: FT/WT ECO-TERRA BONE 20X20 PM Score: 8.39
  ID: JNwwyZcBZASVAhmAxOFn SKU: Z21GXA29100173B1, Name: FT/WT ECO-TERRA GREY 20X20 PM Score: 7.40
  ID: MtwwyZcBZASVAhmA3-FF SKU: Z21UXA48100009A1, Name: FT LATTICE CHARCOAL GREY 16x16 PM Score: 6.92
  ID: KtwwyZcBZASVAhmA0uF1 SKU: Z21GXA40100146C1, Name: FT VILLE (II) IVORY 12x12 PM Score: 5.20
