# OpenAI - A partir de um input de um usuário ele encontra o produto mais relevante

In [3]:
import os
from dotenv import load_dotenv
import openai
import pandas as pd
import numpy as np
import ast
from sklearn.metrics.pairwise import cosine_distances
from tqdm.auto import tqdm

# 1) API key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise ValueError("OPENAI_API_KEY não encontrada no .env")

# 2) Leitura do CSV
df = pd.read_csv('OpenAI_embeddings.csv')

# 3) Colunas de embedding
emb_cols = [
    'embedding_product_name',
    'embedding_category',
    'embedding_about_product',
    'embedding_review_title',
    'embedding_review_content'
]

# 4) Converter string → np.array e computar a média
def average_embeddings(row):
    embs = []
    for col in emb_cols:
        s = row[col]
        if isinstance(s, str) and s.strip():
            embs.append(np.array(ast.literal_eval(s), dtype=float))
    # retorna vetor único de dimensão 1536
    return np.mean(embs, axis=0)

tqdm.pandas(desc="Averaging embeddings")
df['emb_avg'] = df.progress_apply(average_embeddings, axis=1)
emb_matrix = np.vstack(df['emb_avg'].values)  # shape (n, 1536)

# 5) Função para gerar embedding da query (também 1536)
def get_query_embedding(text: str, model: str = "text-embedding-3-small"):
    if not text or not text.strip():
        raise ValueError("Texto de busca vazio")
    resp = openai.embeddings.create(input=text, model=model)
    return np.array(resp.data[0].embedding, dtype=float)

# 6) Busca por relevância semântica
def semantic_search(query_text: str, top_k: int = 3):
    q_emb = get_query_embedding(query_text).reshape(1, -1)  # (1,1536)
    dists = cosine_distances(q_emb, emb_matrix)[0]
    nearest = np.argsort(dists)[:top_k]
    return df.iloc[nearest][['product_id', 'product_name']].reset_index(drop=True)

Averaging embeddings:   0%|          | 0/1465 [00:00<?, ?it/s]

In [9]:
# 7) Uso
query = "mouse for playing videogames for long periods of time"
top3 = semantic_search(query, top_k=3)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 200)
print(top3)

   product_id  \
0  B00NFD0ETQ   
1  B08D64C9FN   
2  B073BRXPZX   

                                                                                                                                                                                            product_name  
0                                                                      Logitech G402 Hyperion Fury USB Wired Gaming Mouse, 4,000 DPI, Lightweight, 8 Programmable Buttons, Compatible for PC/Mac - Black  
1                                                                                                                  Ant Esports GM320 RGB Optical Wired Gaming Mouse | 8 Programmable Buttons | 12800 DPI  
2  Lenovo 300 Wired Plug & Play USB Mouse, High Resolution 1600 DPI Optical Sensor, 3-Button Design with clickable Scroll Wheel, Ambidextrous, Ergonomic Mouse for Comfortable All-Day Grip (GX30M39704)  
