In [97]:
# =========================================================
# === SETUP UNTUK MENGGUNAKAN GPU DI KAGGLE (CUDF & CUML) ===
# =========================================================
!nvidia-smi
!pip install cudf-cu12 cuml-cu12 --extra-index-url=https://pypi.nvidia.com

Tue Nov  4 02:53:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [98]:
!pip install implicit



In [99]:
pip install -U implicit

Note: you may need to restart the kernel to use updated packages.


In [100]:
pip install implicit==0.7.2

Note: you may need to restart the kernel to use updated packages.


In [101]:
# =========================================================
# === IMPORT DAN LOAD DATASET ===
# =========================================================
import os
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from implicit.bpr import BayesianPersonalizedRanking
from tqdm import tqdm

path = "/kaggle/input/train-sisrek/train.csv"
if not os.path.exists(path):
    raise FileNotFoundError(f"Dataset tidak ditemukan: {path}")

df = pd.read_csv(path)
print("Contoh data:")
print(df.head())

Contoh data:
   user_id     item_id
0        8  0002005018
1        8  074322678X
2        8  0887841740
3        8  1552041778
4        8  1567407781


In [102]:
# =========================================================
# === ENCODE USER & ITEM KE INDEX NUMERIK ===
# =========================================================
user_enc = LabelEncoder()
item_enc = LabelEncoder()

df = df.copy()
df["user_idx"] = user_enc.fit_transform(df["user_id"])
df["item_idx"] = item_enc.fit_transform(df["item_id"])

n_users = df["user_idx"].nunique()
n_items = df["item_idx"].nunique()

print(f"\nJumlah user (idx): {n_users}, jumlah item (idx): {n_items}")


Jumlah user (idx): 13876, jumlah item (idx): 123069


In [103]:
# =========================================================
# === BANGUN USER × ITEM MATRIX ===
# =========================================================
user_item_matrix = csr_matrix(
    (np.ones(len(df), dtype=np.float32), (df["user_idx"], df["item_idx"])),
    shape=(n_users, n_items)
)

print("user_item_matrix.shape =", user_item_matrix.shape)
print("nnz (jumlah interaksi) =", user_item_matrix.nnz)

user_item_matrix.shape = (13876, 123069)
nnz (jumlah interaksi) = 269764


In [104]:
# =========================================================
# === TRAINING MODEL BPR ===
# =========================================================
bpr_model = BayesianPersonalizedRanking(
    factors=128,           # lebih banyak latent factor → model lebih ekspresif
    learning_rate=0.01,    # lebih cepat belajar
    regularization=0.01,   # kurangi regularisasi supaya lebih fleksibel
    iterations=100,        # training lebih lama, lebih stabil
    random_state=42
)

print("\nMulai training BPR (orientasi normal user×item)...")
bpr_model.fit(user_item_matrix)
print("Training selesai.")


Mulai training BPR (orientasi normal user×item)...


  0%|          | 0/100 [00:00<?, ?it/s]

Training selesai.


In [105]:
# =========================================================
# === FUNGSI REKOMENDASI UNTUK USER ===
# =========================================================
def recommend_for_user(user_idx, N=10):
    """Memberikan rekomendasi item untuk user tertentu."""
    if user_idx < 0 or user_idx >= n_users:
        raise ValueError(f"user_idx {user_idx} di luar jangkauan 0..{n_users-1}")

    user_vector = user_item_matrix.tocsr()[user_idx]

    ids, scores = bpr_model.recommend(
        userid=user_idx,
        user_items=user_vector,
        N=N,
        filter_already_liked_items=True
    )

    orig_item_ids = item_enc.inverse_transform([int(i) for i in ids])
    return pd.DataFrame({
        "item_idx": ids.astype(int),
        "item_id": orig_item_ids,
        "score": scores
    })

In [106]:

# =========================================================
# === CONTOH REKOMENDASI UNTUK USER TERTENTU ===
# =========================================================
sample_user = 587
print(f"\n--- Rekomendasi untuk user_idx = {sample_user} ---")
recs = recommend_for_user(sample_user, N=10)
print(recs)


--- Rekomendasi untuk user_idx = 587 ---
   item_idx     item_id     score
0     22772  0316693006  1.859326
1     39359  0385510438  1.848833
2     22714  0316603570  1.833809
3     43171  0399150897  1.833482
4     39360  0385511612  1.775807
5     50948  0446527785  1.744008
6     61482  0525947299  1.720963
7     51041  0446531332  1.648251
8     22711  0316602906  1.613187
9     22773  0316693200  1.596377


In [107]:
# =========================================================
# === SPLIT DATA MENJADI TRAIN DAN TEST ===
# =========================================================
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_matrix = csr_matrix(
    (np.ones(len(train_df), dtype=np.float32), (train_df["user_idx"], train_df["item_idx"])),
    shape=(n_users, n_items)
)

test_matrix = csr_matrix(
    (np.ones(len(test_df), dtype=np.float32), (test_df["user_idx"], test_df["item_idx"])),
    shape=(n_users, n_items)
)

In [108]:
from sklearn.model_selection import train_test_split

# =========================================================
# === SPLIT DATAFRAME JADI TRAIN DAN TEST ===
# =========================================================
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Konversi ke matriks sparse
train_matrix = csr_matrix(
    (np.ones(len(train_df), dtype=np.float32), (train_df["user_idx"], train_df["item_idx"])),
    shape=(n_users, n_items)
)

test_matrix = csr_matrix(
    (np.ones(len(test_df), dtype=np.float32), (test_df["user_idx"], test_df["item_idx"])),
    shape=(n_users, n_items)
)

In [109]:
# =========================================================
# === FUNGSI EVALUASI: MAP@K ===
# =========================================================
def average_precision_at_k(actual, predicted, k=10):
    """Hitung average precision at k untuk satu user."""
    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        if p in actual:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    if len(actual) == 0:
        return 0.0

    return score / min(len(actual), k)


def mean_average_precision(model, train_matrix, test_matrix, k=10):
    """Hitung mean average precision untuk seluruh user."""
    aps = []

    for user_idx in tqdm(range(train_matrix.shape[0])):
        recommended, _ = model.recommend(
            userid=user_idx,
            user_items=train_matrix[user_idx],
            N=k,
            filter_already_liked_items=True
        )

        actual = test_matrix[user_idx].indices
        if len(actual) == 0:
            continue

        ap = average_precision_at_k(actual, recommended, k)
        aps.append(ap)

    return np.mean(aps) if len(aps) > 0 else 0.0

In [110]:
# =========================================================
# === HITUNG MAP@10 ===
# =========================================================
map_score = mean_average_precision(bpr_model, train_matrix, test_matrix, k=10)
print(f"\nMean Average Precision @10: {map_score:.4f}")

100%|██████████| 13876/13876 [00:42<00:00, 327.04it/s]


Mean Average Precision @10: 0.0485



