In [25]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from scipy.sparse import load_npz
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# 1. Load model dan data
model = tf.keras.models.load_model("models/model_final2.h5")
vectorizer = joblib.load("vectorizer.pkl")
tfidf_matrix = load_npz("tfidf_matrix.npz")
df_produk = pd.read_csv("all_dataset.csv")

In [None]:
# 2. Buat Data Unik
df_unique = df_produk[['product_id', 'product_name', 'seller_id', 'product_rating', 'product_price']].drop_duplicates()

In [None]:
# 3. Assign Kategori
def assign_category(pid):
    pid_str = str(pid)
    if pid_str.startswith('1'):
        return 'alat_masak'
    elif pid_str.startswith('2'):
        return 'kamera'
    elif pid_str.startswith('3'):
        return 'buku'
    else:
        return 'lainnya'

df_unique['category'] = df_unique['product_id'].apply(assign_category)

In [None]:
# 4. Preprocessing Teks
df_unique['product_name2'] = df_unique['product_name'].str.lower()
df_unique['product_name2'] = df_unique['product_name2'].str.replace(u'\xa0', ' ')
df_unique['product_name2'] = df_unique['product_name2'].str.replace(u'\u200a', ' ')
corpus = df_unique['product_name2'].tolist()

In [None]:
# 5. Mapping Kategori ke Angka
kategori_map = {
    'alat_masak': 0,
    'kamera': 1,
    'buku': 2
}
df_unique['category_label'] = df_unique['category'].map(kategori_map).fillna(-1).astype(int)

In [None]:
# 6. Hitung Jumlah Kategori
num_categories = len(kategori_map)

In [None]:
# 7. Fungsi Rekomendasi
def recommend_with_nn(keyword, df_produk, vectorizer, tfidf_matrix_produk, model, top_n=30):
    query_vec = vectorizer.transform([keyword.lower()])
    cosine_sim = cosine_similarity(query_vec, tfidf_matrix_produk).flatten()

    candidate_indices = cosine_sim.argsort()[-50:][::-1]
    hasil = []

    for idx in candidate_indices:
        produk_tfidf_vec = tfidf_matrix_produk.getrow(idx).toarray().flatten()
        category_label = df_produk.iloc[idx]['category_label']

        if category_label < 0 or category_label >= num_categories:
            continue  # Lewati jika label tidak valid

        category_one_hot = np.zeros(num_categories)
        category_one_hot[int(category_label)] = 1

        x_input = np.concatenate([category_one_hot, produk_tfidf_vec]).reshape(1, -1)
        prob = model.predict(x_input, verbose=0)[0][0]

        seller_id = df_produk.iloc[idx]['seller_id'] if 'seller_id' in df_produk.columns else 'N/A'

        hasil.append({
            'product_name': df_produk.iloc[idx]['product_name'],
            'seller_id': seller_id,
            'score': prob
        })

    df_hasil = pd.DataFrame(hasil)
    df_hasil_sorted = df_hasil.sort_values(by='score', ascending=False).drop(columns=['score'])
    return df_hasil_sorted.head(top_n)

In [None]:
# 8. Jalankan Query
query = input("Cari produk: ")
hasil_rekomendasi = recommend_with_nn(query, df_unique, vectorizer, tfidf_matrix, model)

print(hasil_rekomendasi)

                                         product_name  seller_id
38  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         36
40  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         97
33  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...        196
34  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         51
35  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...        127
36  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         59
37  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         96
39  Sony FE 70-200mm f4 Macro G OSS II Lens Sony F...         83
0   Sony ZV-1 II Vlogging Camera Sony ZV1 II Sony ...        133
1   Sony ZV-1 II Vlogging Camera Sony ZV1 II Sony ...         95
2   Sony ZV-1 II Vlogging Camera Sony ZV1 II Sony ...        172
3   Sony ZV-1 II Vlogging Camera Sony ZV1 II Sony ...         40
4   Sony ZV-1 II Vlogging Camera Sony ZV1 II Sony ...         31
10  Sony A7IV Sony a74 Sony A7 IV Mirrorless Camer...        154
5   Sony A7IV Sony a74 So