In [None]:
#!pip install sentence-transformers faiss-cpu
#!pip install hf_xet

In [1]:
import faiss
import json
import numpy as np
from sentence_transformers import SentenceTransformer

In [2]:
# Sample JSON input
data = [
    {
        "title": "Samsung Galaxy A16 5G",
        "tags": ["phone", "Samsung", "5G", "prepaid", "promotion", "out-of-stock"],
        "content_type": "product_listing",
        "source_url": "https://www.straighttalk.com/shop/all-phones",
        "availability": "out_of_stock",
        "description": "The Samsung Galaxy A16 5G might be available as a promotional offer with select plans, such as a Gold Unlimited Plan or higher. In some cases, this device has been offered for free when bundled with qualifying plans. The full retail price appears to be approximately $99.99. Pricing through financing options like Smartpay may vary depending on credit.",
        "review_summary": "This product currently appears to have no customer reviews. It is likely that further customer feedback could provide more insight into the device's performance and reliability."
    },
    {
        "title": "moto g power 5G - 2024",
        "tags": ["phone", "Motorola", "5G", "prepaid", "promotion", "out-of-stock"],
        "content_type": "product_listing",
        "source_url": "https://www.straighttalk.com/shop/all-phones",
        "availability": "out_of_stock",
        "description": "The 2024 moto g power 5G may have been offered at a discounted price—possibly reduced from $149.99 to $99.99. Customers with excellent credit might be eligible for monthly payments through Smartpay over a 24-month term. Free availability might apply when bundled with higher-tier plans.",
        "review_summary": "There are currently no reviews for this model, suggesting limited customer feedback so far."
    },
    {
        "title": "moto g play - 2024",
        "tags": ["phone", "Motorola", "prepaid", "promotion", "budget", "out-of-stock"],
        "content_type": "product_listing",
        "source_url": "https://www.straighttalk.com/shop/all-phones",
        "availability": "out_of_stock",
        "description": "This device may have been reduced in price from around $39.99 to $29.99, making it one of the more affordable models. Payment plans might be available depending on credit status. Promotional offers could apply in combination with select monthly plans.",
        "review_summary": "This product currently lacks customer reviews, which could indicate it's newly listed or hasn't been widely purchased yet."
    }
]


In [3]:
# Load sentence transformer model
model = SentenceTransformer('all-mpnet-base-v2')

# Combine and embed text content
documents = [
    f"{item['title']}\n{item['description']}\n{item['review_summary']}"
    for item in data
]
embeddings = model.encode(documents)

# Create FAISS index
dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dim)  # L2 distance index
index.add(np.array(embeddings))

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [6]:
# Store metadata separately
metadata_store = []
for item in data:
    metadata_store.append({
        "title": item["title"],
        "tags": item["tags"],
        "content_type": item["content_type"],
        "source_url": item["source_url"],
        "availability": item["availability"],
        "description": item["description"],
        "review_summary": item["review_summary"]
    })

# Save FAISS index and metadata
faiss.write_index(index, "products.index")
with open("products_metadata.json", "w") as f:
    json.dump(metadata_store, f, indent=2)

print("FAISS index and metadata saved.")


FAISS index and metadata saved.


In [7]:
# Load FAISS index and metadata
index = faiss.read_index("products.index")
with open("products_metadata.json", "r") as f:
    metadata_store = json.load(f)

# Query function
def search_products(query: str, top_k: int = 3):
    query_embedding = model.encode([query])
    D, I = index.search(np.array(query_embedding), top_k)

    results = []
    for idx in I[0]:
        if idx < len(metadata_store):
            results.append(metadata_store[idx])
    return results

# Example query
query = "how can i get moto g for free?"
results = search_products(query)

# Display results
for i, res in enumerate(results, 1):
    print(f"\nResult {i}:")
    print(f"Title: {res['title']}")
    print(f"Tags: {res['tags']}")
    print(f"Availability: {res['availability']}")
    print(f"URL: {res['source_url']}")
    print(f"Description: {res['description']}")
    print(f"Review Summary: {res['review_summary']}")


Result 1:
Title: moto g power 5G - 2024
Tags: ['phone', 'Motorola', '5G', 'prepaid', 'promotion', 'out-of-stock']
Availability: out_of_stock
URL: https://www.straighttalk.com/shop/all-phones
Description: The 2024 moto g power 5G may have been offered at a discounted price—possibly reduced from $149.99 to $99.99. Customers with excellent credit might be eligible for monthly payments through Smartpay over a 24-month term. Free availability might apply when bundled with higher-tier plans.
Review Summary: There are currently no reviews for this model, suggesting limited customer feedback so far.

Result 2:
Title: moto g play - 2024
Tags: ['phone', 'Motorola', 'prepaid', 'promotion', 'budget', 'out-of-stock']
Availability: out_of_stock
URL: https://www.straighttalk.com/shop/all-phones
Description: This device may have been reduced in price from around $39.99 to $29.99, making it one of the more affordable models. Payment plans might be available depending on credit status. Promotional offe