In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [15]:
data = pd.read_csv('datasetfinal.csv')
data

Unnamed: 0,seller_id,product_name,product_id,buyer_id,product_rating,product_price
0,457,40 Set Menu Sehari-hari Hits di Instagram ala ...,30070,1504,4.9,142800
1,457,7 Hari Belajar Drone Photography (Edisi Revisi),30079,1435,5.0,102000
2,457,Akasha : City Hunter - Complete Edition 03,30121,1560,4.9,49300
3,457,AKASHA : FATE/APOCRYPHA 02,30163,271,4.8,38250
4,457,AKASHA : RECORD OF RAGNAROK 03,30219,6,5.0,38250
...,...,...,...,...,...,...
49995,105,Samyang V-AF 24mm T1.9 Lens for Sony FE Samyan...,23658,47,5.0,8979000
49996,135,Godox S60 LED Focusing 3-Light Kit,29682,132,5.0,18849000
49997,134,Memory 128GB SF-M Tough Series UHS-II SDXC Mem...,28802,250,5.0,1599000
49998,4,Insta360 One X3 Sticky Lensguard,20253,248,5.0,539000


Pada data di atas, terdapat beberapa row data yang tidak memiliki rating, maka dari itu dilakukan preprocessing dengan menghapus row yang mengandung no rating

In [16]:
data = data[data['product_rating'] != 'No Rating']
data

Unnamed: 0,seller_id,product_name,product_id,buyer_id,product_rating,product_price
0,457,40 Set Menu Sehari-hari Hits di Instagram ala ...,30070,1504,4.9,142800
1,457,7 Hari Belajar Drone Photography (Edisi Revisi),30079,1435,5.0,102000
2,457,Akasha : City Hunter - Complete Edition 03,30121,1560,4.9,49300
3,457,AKASHA : FATE/APOCRYPHA 02,30163,271,4.8,38250
4,457,AKASHA : RECORD OF RAGNAROK 03,30219,6,5.0,38250
...,...,...,...,...,...,...
49995,105,Samyang V-AF 24mm T1.9 Lens for Sony FE Samyan...,23658,47,5.0,8979000
49996,135,Godox S60 LED Focusing 3-Light Kit,29682,132,5.0,18849000
49997,134,Memory 128GB SF-M Tough Series UHS-II SDXC Mem...,28802,250,5.0,1599000
49998,4,Insta360 One X3 Sticky Lensguard,20253,248,5.0,539000


In [17]:
data['product_rating'] = data['product_rating'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['product_rating'] = data['product_rating'].astype(float)


In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 49539 entries, 0 to 49999
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   seller_id       49539 non-null  int64  
 1   product_name    49539 non-null  object 
 2   product_id      49539 non-null  int64  
 3   buyer_id        49539 non-null  int64  
 4   product_rating  49539 non-null  float64
 5   product_price   49539 non-null  int64  
dtypes: float64(1), int64(4), object(1)
memory usage: 2.6+ MB


In [20]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def user_recommendation(user_id, data, user_embeddings, product_encoder, top_n=10, min_rating=4.0):
    """
    Memberikan rekomendasi produk untuk user tertentu berdasarkan kemiripan pengguna lain (user-based CF).

    Parameter:
    - user_id: ID user hasil dari LabelEncoder (bukan buyer_id asli)
    - data: DataFrame yang telah memiliki kolom 'user', 'product', 'product_rating', 'product_id', 'product_name'
    - user_embeddings: matriks embedding pengguna (dari model)
    - product_encoder: encoder untuk produk
    - top_n: jumlah rekomendasi produk
    - min_rating: threshold rating yang dianggap sebagai "tinggi"

    Output:
    - Print daftar rekomendasi
    """

    if user_id >= len(user_embeddings):
        print(f"User ID {user_id} tidak valid.")
        return

    # Cosine Similarity antar pengguna
    target_user_embedding = user_embeddings[user_id]
    similarities = cosine_similarity([target_user_embedding], user_embeddings)

    # Top-N pengguna mirip (kecuali dirinya sendiri)
    similar_users = similarities.argsort()[0][::-1][1:top_n + 1]

    # Produk dengan rating tinggi dari pengguna mirip
    recommended_products = set()
    for similar_user in similar_users:
        user_data = data[data['user'] == similar_user]
        high_rated_products = user_data[user_data['product_rating'] >= min_rating]['product']
        recommended_products.update(high_rated_products)

    # Hilangkan produk yang sudah pernah dibeli/nilai oleh user target
    target_user_products = set(data[data['user'] == user_id]['product'])
    recommended_products -= target_user_products

    if not recommended_products:
        print(f"Tidak ada rekomendasi yang tersedia untuk User {user_id}.")
        return

    # Ambil top_n produk (jika jumlahnya lebih sedikit, tidak apa-apa)
    recommended_products = list(recommended_products)[:top_n]

    # Decode ke ID asli dan cari nama produk
    recommended_product_ids = product_encoder.inverse_transform(recommended_products)
    recommended_names = []
    for pid in recommended_product_ids:
        product_row = data[data['product_id'] == pid]
        if not product_row.empty:
            recommended_names.append(product_row['product_name'].iloc[0])
        else:
            recommended_names.append(f"(nama tidak ditemukan untuk ID {pid})")

    # Hasil
    print(f"User-Based Recommendations for User {user_id}:")
    for name in recommended_names:
        print(f"- {name}")

In [21]:
# Load model
from tensorflow.keras.models import load_model
model = load_model('model.h5')

# Encoding menggunakan Label Encoding
user_encoder = LabelEncoder()
product_encoder = LabelEncoder()

data['user'] = user_encoder.fit_transform(data['buyer_id'])
data['product'] = product_encoder.fit_transform(data['product_id'])

n_users = data['user'].nunique()
n_products = data['product'].nunique()

# Hitung embeddings untuk seluruh user
user_embeddings = model.predict(np.arange(n_users))

# Rekomendasi
user_recommendation(user_id=5, data=data, user_embeddings=user_embeddings, product_encoder=product_encoder, top_n=10)



[1m54/79[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 949us/step

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['user'] = user_encoder.fit_transform(data['buyer_id'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['product'] = product_encoder.fit_transform(data['product_id'])


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
User-Based Recommendations for User 5:
- 6 Pasang Cangkir Kopi Set Motif Olympia Black / Tea Cup Saucer
- DETEKTIF CONAN SPESIAL 40
- Godox QT600IIIM High Speed Studio Flash / Godox QT600IIIM
- Yoshinozuikara - Bagai Katak dalam Tempurung 01
- Quiet !mpact: Tak Masalah Jadi Orang Introver - Sylvia Loehken
- SDI BINDER CLIP 105 N0.0227/12
- Quiet !mpact: Tak Masalah Jadi Orang Introver - Sylvia Loehken
- KOMIK LIGHT NOVEL ONE PIECE: CLOCKWORK ISLAND ADVENTURE
- Mangkok Soto Ceper Motif BLUE STAR Ukuran 7 Inch / Mangkuk Porcelain
- Detektif Conan The Movie: The Time Bomb Skyscraper
