In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sponge_bob_magic.session_handler import State

spark = State().session
spark

In [3]:
from rs_datasets import MovieLens

data = MovieLens("1m")
data.info()

ratings


Unnamed: 0,user_id,item_id,relevance,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968



users


Unnamed: 0,user_id,gender,age,occupation,zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117



items


Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance





In [4]:
from sponge_bob_magic.data_preparator import DataPreparator

log = DataPreparator().transform(
    data=data.ratings,
    columns_names={
        "user_id": "user_id",
        "item_id": "item_id",
        "relevance": "relevance",
        "timestamp": "timestamp"
    }
)

In [5]:
from sponge_bob_magic.splitters import UserSplitter

splitter = UserSplitter(
    drop_cold_items=True,
    drop_cold_users=True,
    item_test_size=1,
    user_test_size=1000,
    seed=1234,
    shuffle=True
)
train, test = splitter.split(log)
(
    train.count(), 
    test.count()
)

(999209, 1000)

In [7]:
from sponge_bob_magic.metrics import HitRate, NDCG, Coverage, Unexpectedness
from sponge_bob_magic.experiment import Experiment

metrics = Experiment(
    test,
    {
        NDCG(): 10,
        HitRate(): 10,
        Coverage(log): 10,
        Unexpectedness(log): 10
    }
)

30-Mar-20 14:23:23, sponge_bob_magic, DEBUG: Предварительная стадия обучения (pre-fit)
30-Mar-20 14:23:27, sponge_bob_magic, DEBUG: Основная стадия обучения (fit)


In [10]:
%%time
from sponge_bob_magic.models import PopRec

metrics.add_result(
    "PopRec",
    PopRec().fit_predict(
        log=train,
        k=10,
        users=test.select("user_id").distinct()
    )
)

30-Mar-20 14:29:21, sponge_bob_magic, DEBUG: Предварительная стадия обучения (pre-fit)
30-Mar-20 14:29:23, sponge_bob_magic, DEBUG: Основная стадия обучения (fit)
30-Mar-20 14:29:23, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 14:40:07, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 14:40:07, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров


CPU times: user 398 ms, sys: 119 ms, total: 517 ms
Wall time: 29min 42s


In [11]:
%%time
from sponge_bob_magic.models import RandomPop

for alpha in [-0.5, 0.0, 0.5, 1.0, 10.0, 100.0]:
    metrics.add_result(
        f"RandomPop(alpha={alpha})",
        RandomPop(alpha).fit_predict(
            log=train,
            k=10,
            users=test.select("user_id").distinct()
        )
    )

30-Mar-20 14:59:03, sponge_bob_magic, DEBUG: Предварительная стадия обучения (pre-fit)
30-Mar-20 14:59:05, sponge_bob_magic, DEBUG: Основная стадия обучения (fit)
30-Mar-20 14:59:11, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:01:36, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:01:36, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:02:05, sponge_bob_magic, DEBUG: Предварительная стадия обучения (pre-fit)
30-Mar-20 15:02:07, sponge_bob_magic, DEBUG: Основная стадия обучения (fit)
30-Mar-20 15:02:13, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:04:42, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:04:42, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-20 15:05:09, sponge_bob_magic, DEBUG: Предварительная стадия обучения (pre-fit)
30-Mar-20 15:05:11, sponge_bob_magic, DEBUG: Основная стадия обучения (fit)
30-Mar-20 15:05:24, sponge_bob_magic, DEBUG: Выделение дефолтных юзеров
30-Mar-

CPU times: user 5.94 s, sys: 1.68 s, total: 7.62 s
Wall time: 18min 1s


In [12]:
metrics.pandas_df.sort_values("HitRate@10", ascending=False)

Unnamed: 0,Coverage@10,HitRate@10,NDCG@10,Unexpectedness@10
PopRec,0.03211,0.08,0.041412,0.0088
RandomPop(alpha=0.0),0.618187,0.017,0.003498,0.9706
RandomPop(alpha=1.0),0.628171,0.014,0.006041,0.9739
RandomPop(alpha=10.0),0.660281,0.011,0.004854,0.972
RandomPop(alpha=100.0),0.801403,0.01,0.002987,0.9781
RandomPop(alpha=0.5),0.626552,0.009,0.003642,0.97
RandomPop(alpha=-0.5),0.601457,0.005,0.004515,0.971
