In [None]:
import sys
import os
import openai
import pandas as pd
from elasticsearch import Elasticsearch
from dotenv import load_dotenv
import random
import mysql.connector
from mysql.connector import Error

sys.path.append(os.path.abspath("../../"))
sys.path.append(os.path.abspath("../../../"))  # Adjust the path to include the directory containing the 'app' module

from app.config import ELASTICSEARCH_HOST, ELASTICSEARCH_USER, ELASTICSEARCH_PASS

# Load API keys and environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Elasticsearch setup
es = Elasticsearch(
    ELASTICSEARCH_HOST,
    basic_auth=(ELASTICSEARCH_USER, ELASTICSEARCH_PASS),
    verify_certs=False
)

# Load Products.csv
csv_path = "../resources/Products.csv"
try:
    product_inventory = pd.read_csv(csv_path)
    print(f"[DEBUG] Loaded product inventory from {csv_path}")
except Exception as e:
    print(f"[ERROR] Failed to load product inventory: {e}")
    sys.exit(1)

# Keep only required columns
product_inventory = product_inventory[["name", "price", "description", "category"]]
print(f"[DEBUG] Product inventory columns: {list(product_inventory.columns)}")

# Generate new products using OpenAI GPT
def generate_product_record(category):
    prompt = f"""
    Generate a product record for a SmartHome product in the category '{category}' with description. 
    The product record should include:
    - Product Name
    - Product Price
    - Category
    - Description (100 words max)
    Provide it in the following format:
     Product Name: ...
     Product Price: ...
     Category: ...
     Description: ...
    """
    try:
        print(f"[DEBUG] Sending prompt to GPT for category: {category}")
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a product description generator."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=200,
            temperature=0.7,
        )
        product_description = response["choices"][0]["message"]["content"].strip()
        print(f"[DEBUG] GPT response: {product_description}")
        return product_description
    except Exception as e:
        print(f"[ERROR] GPT failed to generate product: {e}")
        return None

# Generate 10 new products
categories = ["Smart Doorbells", "Smart Locks", "Smart Speakers", "Smart Lighting", "Smart Thermostats"]
new_products = []

for _ in range(10):
    category = random.choice(categories)
    product_record = generate_product_record(category)
    if not product_record:
        print("[ERROR] Skipping product generation due to GPT error.")
        continue

    try:
        product_lines = product_record.split("\n")
        product_dict = {
            "name": product_lines[0].split(": ")[1],
            "price": float(product_lines[1].split(": ")[1].replace("$", "")),
            "category": product_lines[2].split(": ")[1],
            "description": product_lines[3].split(": ")[1],
        }
        new_products.append(product_dict)
    except IndexError as e:
        print(f"[ERROR] Failed to parse GPT product record: {product_record}. Error: {e}")

print(f"[DEBUG] Generated {len(new_products)} new products.")

# Convert new products to DataFrame
new_products_df = pd.DataFrame(new_products)

# Combine existing and new products
combined_products = pd.concat([product_inventory, new_products_df], ignore_index=True)
print(f"[DEBUG] Combined product count: {len(combined_products)}")

# Generate embeddings using text-embedding-3-small model
def generate_embedding(text):
    try:
        response = openai.Embedding.create(
            model="text-embedding-3-small",
            input=text,
        )
        return response["data"][0]["embedding"]
    except Exception as e:
        print(f"[ERROR] Failed to generate embedding for text: {text}. Error: {e}")
        return None

# Add embeddings to the combined dataset
combined_products["embedding"] = combined_products["description"].apply(generate_embedding)
print(f"[DEBUG] Added embeddings to products.")

# Save combined products locally for reference
try:
    combined_products.to_csv("../resources/GeneratedProducts.csv", index=False)
    print("[DEBUG] Combined products saved to GeneratedProducts.csv")
except Exception as e:
    print(f"[ERROR] Failed to save combined products: {e}")

# Ensure Elasticsearch index is created with correct mappings
index_name = "product_records"
if not es.indices.exists(index=index_name):
    es.indices.create(
        index=index_name,
        body={
            "mappings": {
                "properties": {
                    "name": {"type": "text"},
                    "price": {"type": "float"},
                    "category": {"type": "keyword"},
                    "description": {"type": "text"},
                    "embedding": {"type": "dense_vector", "dims": 768}
                }
            }
        }
    )
    print(f"[DEBUG] Created Elasticsearch index: {index_name}")

# Store combined products in Elasticsearch
for _, row in combined_products.iterrows():
    # Check if embedding is valid
    if row["embedding"] is None:
        print(f"[WARNING] Skipping product due to missing embedding: {row['name']}")
        continue

    doc = {
        "name": row["name"],
        "price": row["price"],
        "category": row["category"],
        "description": row["description"],
        "embedding": row["embedding"],  # Ensure embedding is a list of floats
    }
    try:
        es.index(index=index_name, body=doc)
        print(f"[DEBUG] Indexed product: {row['name']}")
    except Exception as e:
        print(f"[ERROR] Failed to index document to Elasticsearch: {row['name']}. Error: {e}")

print("[INFO] Products and embeddings successfully indexed in Elasticsearch.")



  _transport = transport_class(


[DEBUG] Loaded product inventory from ../resources/Products.csv
[DEBUG] Product inventory columns: ['name', 'price', 'description', 'category']
[DEBUG] Sending prompt to GPT for category: Smart Locks
[DEBUG] GPT response: Product Name: SecureLock Pro 3000  
Product Price: $199.99  
Category: Smart Locks  
Description: The SecureLock Pro 3000 revolutionizes home security with its advanced biometric technology and smartphone integration. Featuring a sleek, modern design, this smart lock allows you to unlock your door using your fingerprint or mobile app from anywhere. With real-time alerts, you can monitor access and grant temporary codes to guests. Its durable, weather-resistant construction ensures reliability in any condition. Installation is a breeze with our step-by-step guide, and it seamlessly fits most standard doors. Elevate your home security effortlessly with the SecureLock Pro 3000, where safety meets convenience.
[DEBUG] Sending prompt to GPT for category: Smart Thermostats




[ERROR] Failed to index document to Elasticsearch: Blink Video Doorbell. Error: BadRequestError(400, 'document_parsing_exception', "[1:16615] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'mWSbUZMBGboUenIJZATo'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Tapo TP-Link Smart Doorbell. Error: BadRequestError(400, 'document_parsing_exception', "[1:16616] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'mmSbUZMBGboUenIJZQRu'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Ring Video Doorbell. Error: BadRequestError(400, 'document_parsing_exception', "[1:16570] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'm2SbUZMBGboUenIJZQSy'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Google Nest Doorbell. Error: BadRequestE



[ERROR] Failed to index document to Elasticsearch: Keyless Entry Door Lock with Handle. Error: BadRequestError(400, 'document_parsing_exception', "[1:16635] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'nmSbUZMBGboUenIJZgQl'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Philips Wi-Fi Smart Door Lock. Error: BadRequestError(400, 'document_parsing_exception', "[1:16680] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'n2SbUZMBGboUenIJZgRl'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Keypad Smart Door Lock. Error: BadRequestError(400, 'document_parsing_exception', "[1:16646] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'oGSbUZMBGboUenIJZgSn'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: eufy Smart Lock C30.



[ERROR] Failed to index document to Elasticsearch: Yale Assure Lock 2 with Wi-Fi. Error: BadRequestError(400, 'document_parsing_exception', "[1:16625] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'omSbUZMBGboUenIJZwQf'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Keyless Entry Door Lock with Handle. Error: BadRequestError(400, 'document_parsing_exception', "[1:16635] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'o2SbUZMBGboUenIJZwRK'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Govee Smart Light Bulbs. Error: BadRequestError(400, 'document_parsing_exception', "[1:16608] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'pGSbUZMBGboUenIJZwRo'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: GE CYNC A19 Smart L



[ERROR] Failed to index document to Elasticsearch: NOTABRICK Bluetooth Speakers. Error: BadRequestError(400, 'document_parsing_exception', "[1:16603] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'qmSbUZMBGboUenIJaAQs'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Google Nest Mini. Error: BadRequestError(400, 'document_parsing_exception', "[1:16540] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'q2SbUZMBGboUenIJaAR6'] has more dimensions than defined in the mapping [768]")




[ERROR] Failed to index document to Elasticsearch: Sonos Era 100. Error: BadRequestError(400, 'document_parsing_exception', "[1:16554] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'rGSbUZMBGboUenIJaQSZ'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Anker Soundcore 2 Portable Bluetooth Speaker. Error: BadRequestError(400, 'document_parsing_exception', "[1:16600] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'rWSbUZMBGboUenIJaQTw'] has more dimensions than defined in the mapping [768]")




[ERROR] Failed to index document to Elasticsearch: Edifier WiFi Smart Speaker. Error: BadRequestError(400, 'document_parsing_exception', "[1:16541] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'rmSbUZMBGboUenIJawQJ'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Amazon Smart Thermostat. Error: BadRequestError(400, 'document_parsing_exception', "[1:16554] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'r2SbUZMBGboUenIJbAQE'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: GE CYNC Smart Thermostat. Error: BadRequestError(400, 'document_parsing_exception', "[1:16543] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'sGSbUZMBGboUenIJbAQh'] has more dimensions than defined in the mapping [768]")




[ERROR] Failed to index document to Elasticsearch: Sensi Lite Smart Thermostat. Error: BadRequestError(400, 'document_parsing_exception', "[1:16509] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'sWSbUZMBGboUenIJbARa'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Google Nest Thermostat. Error: BadRequestError(400, 'document_parsing_exception', "[1:16503] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'smSbUZMBGboUenIJbAS3'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Honeywell Home RTH8800WF2022. Error: BadRequestError(400, 'document_parsing_exception', "[1:16526] failed to parse: The [dense_vector] field [embedding] in doc [document with id 's2SbUZMBGboUenIJbATQ'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Vine Thermostat. Error: BadRe



[ERROR] Failed to index document to Elasticsearch: EchoSphere Smart Speaker  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17044] failed to parse: The [dense_vector] field [embedding] in doc [document with id 't2SbUZMBGboUenIJbQRV'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: SmartView Video Doorbell  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17031] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'uGSbUZMBGboUenIJbQSq'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: EcoSmart Thermostat Pro  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17030] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'uWSbUZMBGboUenIJbQTc'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: SecureSmart Pro Lock  . Error



[ERROR] Failed to index document to Elasticsearch: EchoSphere 3000  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17044] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'vGSbUZMBGboUenIJbgRA'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Ring Video Doorbell Pro 2  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17030] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'vWSbUZMBGboUenIJbgR8'] has more dimensions than defined in the mapping [768]")
[ERROR] Failed to index document to Elasticsearch: Lumos Smart LED Light Bulb  . Error: BadRequestError(400, 'document_parsing_exception', "[1:17033] failed to parse: The [dense_vector] field [embedding] in doc [document with id 'vmSbUZMBGboUenIJbgST'] has more dimensions than defined in the mapping [768]")
[INFO] Products and embeddings successfully indexed in Elasticsearch.


