<font color="#CC3D3D"><p>
# RecSys Competition Starter Notebook #1
### Model Comparision

### Setup

In [1]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm 
# Cornac API 
import cornac
from cornac.eval_methods import BaseMethod, RatioSplit, StratifiedSplit, CrossValidation
from cornac.models import NeuMF, BiVAECF, EASE, UserKNN, ItemKNN, MF, WMF
from cornac.metrics import Precision, Recall, NDCG, AUC, MAP
from cornac.hyperopt import GridSearch, RandomSearch, Discrete, Continuous

In [2]:
# Data column definition
DEFAULT_USER_COL = 'user_id'
DEFAULT_ITEM_COL = 'item_id'
DEFAULT_RATING_COL = 'rating'

# Top k items to recommend
TOP_K = 5

# Random seed, Verbose, etc.
SEED = 202511
VERBOSE = True

### Data Preparation

In [3]:
# 데이터 로딩
data = pd.read_csv('comp_train.csv')
data[DEFAULT_RATING_COL] = 1  # Cornac에서 지정한 데이터형식(UIR: User, Item, Rating)에 따라
data

Unnamed: 0,user_id,item_id,rating
0,U16127,P00244,1
1,U28891,P01918,1
2,U01934,P02258,1
3,U22546,P01790,1
4,U15740,P00801,1
...,...,...,...
59007,U01786,P01837,1
59008,U07976,P02687,1
59009,U20688,P01698,1
59010,U05402,P00474,1


<font color="blue"><p>
### Model Configuration

In [4]:
models = {}  # models['모델명'][0] => model 객체, models['모델명'][1] => model 파라미터

##### User K-Nearest-Neighbors (UserKNN)

In [5]:
params = {
    'k': 20,
    'similarity': 'cosine', # ['cosine', 'pearson']
    'amplify': 1.0,
}

# U-to-U CF
model = UserKNN(**params, seed=SEED, verbose=VERBOSE)
models[model.name] = (model, params)

##### Weighted Matrix Factorization (WMF)

In [6]:
# WMF는 Tensorflow가 설치되어 있어야 실행됨!!!
params = {
    'k': 200,
    'max_iter': 100,
    'learning_rate': 0.001,
    'lambda_u': 0.01,
    'lambda_v': 0.01,
    'a': 1, 
    'b': 0.01,
    'batch_size': 128
}

model = WMF(**params, seed=SEED, verbose=VERBOSE)
models[model.name] = (model, params)

##### Embarrassingly Shallow Autoencoders for Sparse Data (EASE)

In [7]:
params = {
    'lamb': 500,
    'posB': True,
}

model = EASE(**params, seed=SEED, verbose=VERBOSE)
models[model.name] = (model, params)

### Experiment & Evaluation

In [8]:
# 실험용 샘플 데이터 (사용자별 모든 상호작용 유지)
sampled_users = data[DEFAULT_USER_COL].drop_duplicates().sample(frac=0.5, random_state=SEED) # 사용자 50% 샘플링
sample_data = data[data[DEFAULT_USER_COL].isin(sampled_users)]

# 실험방법 설정: Stratified split (user별로 train/test 분할)
eval_method = StratifiedSplit(
    data=np.array(sample_data), 
    fmt='UIR',
    group_by="user",
    test_size=0.2,   # 최종 평가용
    exclude_unknowns=True, 
    seed=SEED, 
    verbose=VERBOSE
)

# 평가척도 설정
metrics = [Recall(k=TOP_K)]

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 4022
Number of items = 2789
Number of ratings = 22339
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 4022
Number of items = 2789
Number of ratings = 7224
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 4022
Total items = 2789


In [9]:
%%time

# 실험 수행
try:
    cornac.Experiment(
        eval_method=eval_method,
        models=[m[0] for m in models.values()],
        metrics=metrics,
        user_based=False,
        show_validation=False,
    ).run()
except UnicodeEncodeError:
    pass


[UserKNN] Training started!


  0%|          | 0/4022 [00:00<?, ?it/s]


[UserKNN] Evaluation started!


Ranking:   0%|          | 0/3978 [00:00<?, ?it/s]


[WMF] Training started!



  0%|          | 0/100 [00:00<?, ?it/s]

Learning completed!

[WMF] Evaluation started!


Ranking:   0%|          | 0/3978 [00:00<?, ?it/s]


[EASEᴿ] Training started!

[EASEᴿ] Evaluation started!


Ranking:   0%|          | 0/3978 [00:00<?, ?it/s]


TEST:
...
        | Recall@5 | Train (s) | Test (s)
------- + -------- + --------- + --------
UserKNN |   0.0702 |    0.4663 |  30.7684
WMF     |   0.0159 |   50.5390 |   2.8094
EASEᴿ   |   0.0712 |    0.5956 |   2.5720

CPU times: total: 3min 28s
Wall time: 1min 27s


<font color="#CC3D3D"><p>
# End