In [1]:
import pandas as pd
from collections import Counter

# 1. Baca data
df = pd.read_csv("train.csv")

In [3]:
# 2. Hitung popularitas tiap item (berapa kali item muncul)
item_popularity = df['item_id'].value_counts().reset_index()
item_popularity.columns = ['item_id', 'popularity']

print("Top 5 item paling populer:")
print(item_popularity.head())

Top 5 item paling populer:
      item_id  popularity
0  0316666343         427
1  0385504209         330
2  0312195516         241
3  0142001740         214
4  059035342X         206


In [5]:
# 3. Buat fungsi rekomendasi untuk user (berdasarkan popularitas)
def recommend_items_for_user(user_id, df, top_n=10):
    # Ambil semua item yang sudah pernah diinteraksi user
    user_items = df[df['user_id'] == user_id]['item_id'].unique()
    
    # Filter item yang belum pernah user interaksi
    unseen_items = item_popularity[~item_popularity['item_id'].isin(user_items)]
    
    # Rekomendasi item populer yang belum pernah dilihat user
    recommendations = unseen_items.head(top_n)
    return recommendations['item_id'].tolist()

# Contoh rekomendasi untuk user_id = 1
print("\nRekomendasi untuk user 1:")
print(recommend_items_for_user(1, df, top_n=5))


Rekomendasi untuk user 1:
['0316666343', '0385504209', '0312195516', '0142001740', '059035342X']


In [7]:
from sklearn.model_selection import train_test_split

# 1. Split data jadi train dan test
train, test = train_test_split(df, test_size=0.2, random_state=42)

# 2. Rehitung popularitas berdasarkan data train
item_popularity = train['item_id'].value_counts().reset_index()
item_popularity.columns = ['item_id', 'popularity']

# 3. Fungsi MAP@K
def apk(actual, predicted, k=10):
    if len(predicted) > k:
        predicted = predicted[:k]
    score = 0.0
    num_hits = 0.0
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    if not actual:
        return 0.0
    return score / min(len(actual), k)

def mapk(test_df, k=10):
    users = test_df['user_id'].unique()
    apk_scores = []
    for u in users:
        actual = test_df[test_df['user_id'] == u]['item_id'].tolist()
        predicted = recommend_items_for_user(u, train, top_n=k)
        apk_scores.append(apk(actual, predicted, k))
    return sum(apk_scores) / len(apk_scores)

# 4. Hitung MAP@10
map10 = mapk(test, k=10)
print("\nMAP@10 (Item Popularity):", round(map10, 4))


MAP@10 (Item Popularity): 0.0044
