In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append("/home/td-user/rtrec")

In [3]:
from rtrec.experiments.datasets import load_movielens

df = load_movielens(dataset_scale='1m', load_user_attributes=True, load_item_attributes=True)

Using existing ratings.dat file.


In [4]:
from rtrec.experiments.split import temporal_user_split
train_df, test_df = temporal_user_split(df)

# With user/item features

In [5]:
user_tags = train_df.loc[train_df.groupby('user')['tstamp'].idxmax()][['user', 'gender', 'age', 'occupation', 'zip_code']].set_index('user').apply(
    lambda row: {f"gender#{row['gender']}", f"age#{row['age']}", f"occupation#{row['occupation']}", f"zipcode#{row['zip_code']}"},
    axis=1
).to_dict()

In [6]:
item_tags = (
    train_df.groupby("item")["genres"]
    .apply(lambda genres: set(f"genre#{tag}" for genre_list in genres for tag in genre_list.split('|')))
    .to_dict()
)

In [7]:
from rtrec.recommender import Recommender
from rtrec.models import LightFM

model = LightFM(no_components=10, loss="warp", epochs=20, item_alpha=0, user_alpha=0)
recommender = Recommender(model)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
recommender.bulk_fit(train_df, user_tags=user_tags, item_tags=item_tags)

Register user features: 100%|██████████| 6040/6040 [00:00<00:00, 622233.04it/s]
Register item features: 100%|██████████| 3667/3667 [00:00<00:00, 907726.20it/s]
Add interactions: 100%|██████████| 798/798 [00:02<00:00, 323.54it/s]
Epoch: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]

Fit completed in 16.36 seconds
Throughput: 48750.84 samples/sec





<rtrec.recommender.Recommender at 0xffff6d5f6840>

In [9]:
recommender.evaluate(test_df, recommend_size=10, filter_interacted=True)

100%|██████████| 61/61 [00:01<00:00, 52.52it/s]


{'precision': 0.12849337748344747,
 'recall': 0.05983215239703983,
 'f1': 0.06723581613869616,
 'ndcg': 0.14178229831504563,
 'hit_rate': 0.5821192052980132,
 'mrr': 0.2845363581414893,
 'map': 0.07076247486209367,
 'tp': 7761,
 'auc': 0.3242072361505315}

In [10]:
recommender.evaluate(test_df, user_tags=user_tags, recommend_size=10, filter_interacted=True)

100%|██████████| 61/61 [00:01<00:00, 60.40it/s]


{'precision': 0.12849337748344747,
 'recall': 0.05983215239703983,
 'f1': 0.06723581613869616,
 'ndcg': 0.14178229831504563,
 'hit_rate': 0.5821192052980132,
 'mrr': 0.2845363581414893,
 'map': 0.07076247486209367,
 'tp': 7761,
 'auc': 0.3242072361505315}

In [11]:
test_user_tags = test_df.loc[test_df.groupby('user')['tstamp'].idxmax()][['user', 'gender', 'age', 'occupation', 'zip_code']].set_index('user').apply(
    lambda row: {f"gender#{row['gender']}", f"age#{row['age']}", f"occupation#{row['occupation']}", f"zipcode#{row['zip_code']}"},
    axis=1
).to_dict()

In [12]:
recommender.evaluate(test_df, user_tags=test_user_tags, recommend_size=10, filter_interacted=True)

100%|██████████| 61/61 [00:01<00:00, 59.73it/s]


{'precision': 0.12849337748344747,
 'recall': 0.05983215239703983,
 'f1': 0.06723581613869616,
 'ndcg': 0.14178229831504563,
 'hit_rate': 0.5821192052980132,
 'mrr': 0.2845363581414893,
 'map': 0.07076247486209367,
 'tp': 7761,
 'auc': 0.3242072361505315}

# Without user/item features

Confirm using user/item features shows a better result than this attempt.

In [13]:
from rtrec.recommender import Recommender
from rtrec.models import LightFM

model = LightFM(no_components=10, loss="warp", epochs=20, item_alpha=0, user_alpha=0)
recommender = Recommender(model)

In [14]:
recommender.bulk_fit(train_df)

Add interactions: 100%|██████████| 798/798 [00:01<00:00, 401.29it/s]
Epoch: 100%|██████████| 20/20 [00:08<00:00,  2.47it/s]

Fit completed in 10.42 seconds
Throughput: 76568.95 samples/sec





<rtrec.recommender.Recommender at 0xffff6c8d3560>

In [15]:
recommender.evaluate(test_df, recommend_size=10, filter_interacted=True)

100%|██████████| 61/61 [00:00<00:00, 81.17it/s]


{'precision': 0.12908940397351368,
 'recall': 0.060820811071732844,
 'f1': 0.06797055190409505,
 'ndcg': 0.14120548788972284,
 'hit_rate': 0.5817880794701987,
 'mrr': 0.2769639572164398,
 'map': 0.07048572715970149,
 'tp': 7797,
 'auc': 0.3169937913907297}