In [1]:
!pip install lightfm
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.model_selection import train_test_split
import random
from tqdm.notebook import tqdm
import json
from lightfm.data import Dataset
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k, reciprocal_rank
from lightfm.cross_validation import random_train_test_split
from lightfm import LightFM
import dask.dataframe as dd


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lightfm
  Downloading lightfm-1.16.tar.gz (310 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.1/310.1 KB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.16-cp38-cp38-linux_x86_64.whl size=899847 sha256=bd3ce0bcb427833b99d897df0873b2f4105bf1eac4bca2e457256c2b091faa9d
  Stored in directory: /root/.cache/pip/wheels/ec/bb/51/9c487d021c1373b691d13cadca0b65b6852627b1f3f43550fa
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.16


In [2]:
from google.colab import drive

drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


In [5]:
small_df = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/rec sys/2000x3000.csv')
small_df = small_df.drop(columns=['rating'])

In [10]:
dataset = Dataset()
%time
dataset.fit(small_df.user_id, small_df.track_id)

CPU times: user 16 µs, sys: 0 ns, total: 16 µs
Wall time: 28.6 µs


In [11]:
(interactions, weights) = dataset.build_interactions(small_df.itertuples(False, None))

In [12]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, Num items {}.'.format(num_users, num_items))

Num users: 1939, Num items 3000.


In [13]:
(train_interactions, test_interactions) = random_train_test_split(
    interactions,
    test_percentage=0.2,
    random_state=42,
)

(train_weights, test_weights) = random_train_test_split(
    weights,
    test_percentage=0.2,
    random_state=42,
)

In [22]:
results = []
max_pr_iter = -1
max_pr = 0
max_rc_iter = -1
max_rc = 0

for i in range(20):
  parameters = {
            "no_components": np.random.randint(16, 64),
            "learning_schedule": np.random.choice(["adagrad", "adadelta"]),
            "loss": np.random.choice(["bpr", "warp"]),
            "learning_rate": np.random.exponential(0.05),
            "item_alpha": np.random.exponential(1e-8),
            "user_alpha": np.random.exponential(1e-8),
            "max_sampled": np.random.randint(5, 15),
            "num_epochs": np.random.randint(5, 50),
        }
  num_epochs = parameters.pop("num_epochs")
  model = LightFM(**parameters)
  model.fit(train_interactions, 
            sample_weight=train_weights, 
            epochs=num_epochs,
            num_threads=12,
            verbose=True)
  
  parameters['num_epochs'] = num_epochs
  recall_at_10 = recall_at_k(
    model,
    test_interactions,
    train_interactions,
    k=10,
    num_threads=12,
    ).mean()
  precision_at_10 = precision_at_k(
    model,
    test_interactions,
    train_interactions,
    k=10,
    num_threads=12,
    ).mean()
  if precision_at_10 > max_pr:
    max_pr = precision_at_10
    max_pr_iter = i
    print(f'precision on {i}: {precision_at_10}')
  if recall_at_10 > max_rc:
    max_rc = recall_at_10
    max_rc_iter = i
    print(f'recall on {i}: {recall_at_10}')
  parameters['precision_at_10'] = precision_at_10
  parameters['recall_at_10'] = recall_at_10
  results.append(parameters)

Epoch: 100%|██████████| 11/11 [00:12<00:00,  1.17s/it]


precision on 0: 0.3359854221343994
recall on 0: 0.07228603661501405


Epoch: 100%|██████████| 31/31 [00:13<00:00,  2.29it/s]
Epoch: 100%|██████████| 28/28 [00:19<00:00,  1.40it/s]
Epoch: 100%|██████████| 48/48 [00:33<00:00,  1.42it/s]


precision on 3: 0.44128963351249695
recall on 3: 0.11131614566973143


Epoch: 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]
Epoch: 100%|██████████| 41/41 [00:31<00:00,  1.30it/s]
Epoch: 100%|██████████| 15/15 [00:10<00:00,  1.40it/s]
Epoch: 100%|██████████| 19/19 [00:20<00:00,  1.07s/it]
Epoch: 100%|██████████| 39/39 [00:29<00:00,  1.34it/s]


precision on 8: 0.44134166836738586


Epoch: 100%|██████████| 48/48 [00:32<00:00,  1.47it/s]
Epoch: 100%|██████████| 27/27 [00:24<00:00,  1.12it/s]
Epoch: 100%|██████████| 24/24 [00:09<00:00,  2.64it/s]
Epoch: 100%|██████████| 40/40 [00:35<00:00,  1.13it/s]
Epoch: 100%|██████████| 47/47 [01:00<00:00,  1.29s/it]


precision on 13: 0.44321373105049133
recall on 13: 0.11928828267024827


Epoch: 100%|██████████| 20/20 [00:12<00:00,  1.59it/s]
Epoch: 100%|██████████| 47/47 [00:24<00:00,  1.91it/s]
Epoch: 100%|██████████| 46/46 [00:44<00:00,  1.03it/s]
Epoch: 100%|██████████| 42/42 [00:25<00:00,  1.62it/s]
Epoch: 100%|██████████| 10/10 [00:04<00:00,  2.45it/s]
Epoch: 100%|██████████| 13/13 [00:13<00:00,  1.01s/it]


In [23]:
results[13]

{'no_components': 58,
 'learning_schedule': 'adadelta',
 'loss': 'warp',
 'learning_rate': 0.0035804710258826042,
 'item_alpha': 3.801716649932694e-08,
 'user_alpha': 3.3596022700403683e-09,
 'max_sampled': 14,
 'num_epochs': 47,
 'precision_at_10': 0.44321373,
 'recall_at_10': 0.11928828267024827}

In [24]:
results[3]

{'no_components': 25,
 'learning_schedule': 'adadelta',
 'loss': 'warp',
 'learning_rate': 0.03462561685987776,
 'item_alpha': 4.974513184134289e-09,
 'user_alpha': 7.970484330607579e-09,
 'max_sampled': 14,
 'num_epochs': 48,
 'precision_at_10': 0.44128963,
 'recall_at_10': 0.11131614566973143}