In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import faiss
import numpy as np
import pickle
from tqdm import tqdm

# Load mô hình
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Load dữ liệu
df = pd.read_csv("products.csv")  # hoặc đọc từ DB

# Fill NaN để tránh lỗi
df.fillna("", inplace=True)

# Tạo văn bản để embedding (có thể tinh chỉnh theo use-case)
texts = (df["name"] + ". " + df["short_description"]).tolist()

# Encode văn bản thành vector
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)

# FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Lưu FAISS index
faiss.write_index(index, "product_faiss.index")

# Tạo metadata map (đưa từng dòng thành dict)
metadata = df.to_dict(orient="records")
with open("product_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)


In [None]:
def recommend_products(query, top_k=5):
    # Load mô hình, FAISS, metadata
    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    index = faiss.read_index("product_faiss.index")
    with open("product_metadata.pkl", "rb") as f:
        metadata = pickle.load(f)

    # Convert query → vector
    query_vector = model.encode([query])
    
    # Tìm kiếm top-k
    distances, indices = index.search(query_vector, top_k)

    # Lấy metadata tương ứng
    results = [metadata[i] for i in indices[0]]
    return results
