In [1]:
%pip --quiet install lightfm


Note: you may need to restart the kernel to use updated packages.


In [2]:
from lightfm.data import Dataset
import pandas as pd

dataset = Dataset()

books = pd.read_csv('Books.csv')
books = books.astype(str)

users = pd.read_csv('Users.csv')
users['Age'] = users['Age'].fillna(users['Age'].median())
users = users.astype(str)

ratings = pd.read_csv('Ratings.csv')
ratings = ratings.astype(str)
ratings['Book-Rating'] = ratings['Book-Rating'].astype(int)



  books = pd.read_csv('Books.csv')


In [3]:
print(users.head())
print(books.head())
print(ratings.head())

  User-ID                            Location   Age
0       1                  nyc, new york, usa  32.0
1       2           stockton, california, usa  18.0
2       3     moscow, yukon territory, russia  32.0
3       4           porto, v.n.gaia, portugal  17.0
4       5  farnborough, hants, united kingdom  32.0
         ISBN                                         Book-Title  \
0  0195153448                                Classical Mythology   
1  0002005018                                       Clara Callan   
2  0060973129                               Decision in Normandy   
3  0374157065  Flu: The Story of the Great Influenza Pandemic...   
4  0393045218                             The Mummies of Urumchi   

            Book-Author Year-Of-Publication                   Publisher  \
0    Mark P. O. Morford                2002     Oxford University Press   
1  Richard Bruce Wright                2001       HarperFlamingo Canada   
2          Carlo D'Este                1991           

In [4]:
users=users[users['User-ID'].isin(ratings['User-ID'])] # ValueError: user id 1 not in user id mappings. (if user in users but not in ratings)
books=books[books['ISBN'].isin(ratings['ISBN'])] # ValueError: item id 044990671X not in item id mappings. (if book is in books but not in ratings)

book_features = [
    (item_id, [f'{title}', f'{author}', f'{year_of_publication}', f'{publisher}'])
    for item_id, title, author, year_of_publication, publisher in zip(books['ISBN'], books['Book-Title'], books['Book-Author'], books['Year-Of-Publication'], books['Publisher'])
]

user_features = [
    (user_id, [f'{location}', f'{age}'])
    for user_id, location, age in zip(users['User-ID'], users['Location'], users['Age'])
]

In [5]:
book_features[:5]

[('0195153448',
  ['Classical Mythology',
   'Mark P. O. Morford',
   '2002',
   'Oxford University Press']),
 ('0002005018',
  ['Clara Callan', 'Richard Bruce Wright', '2001', 'HarperFlamingo Canada']),
 ('0060973129',
  ['Decision in Normandy', "Carlo D'Este", '1991', 'HarperPerennial']),
 ('0374157065',
  ['Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It',
   'Gina Bari Kolata',
   '1999',
   'Farrar Straus Giroux']),
 ('0393045218',
  ['The Mummies of Urumchi',
   'E. J. W. Barber',
   '1999',
   'W. W. Norton &amp; Company'])]

In [6]:
user_features[:5]

[('2', ['stockton, california, usa', '18.0']),
 ('7', ['washington, dc, usa', '32.0']),
 ('8', ['timmins, ontario, canada', '32.0']),
 ('9', ['germantown, tennessee, usa', '32.0']),
 ('10', ['albacete, wisconsin, spain', '26.0'])]

In [7]:


dataset.fit(
    users = ratings["User-ID"].unique(),
    items = ratings["ISBN"].unique(),
    user_features=users[["Location","Age"]].values.flatten().tolist(),
    item_features=books[["Book-Title","Book-Author","Year-Of-Publication","Publisher"]].values.flatten().tolist()
)

ratings_tuples = list(zip(ratings["User-ID"], ratings["ISBN"], ratings["Book-Rating"]))
(interactions, weights) = dataset.build_interactions(ratings_tuples)

user_features_list = dataset.build_user_features(user_features)
item_features_list = dataset.build_item_features(book_features)

In [8]:
user_features_list[:5]

<5x131538 sparse matrix of type '<class 'numpy.float32'>'
	with 15 stored elements in Compressed Sparse Row format>

In [9]:
item_features_list[:5]

<5x699042 sparse matrix of type '<class 'numpy.float32'>'
	with 25 stored elements in Compressed Sparse Row format>

In [10]:
from lightfm import LightFM

model = LightFM(loss="warp")
model.fit(
    interactions=interactions,
    sample_weight=weights,
    user_features=user_features_list,
    item_features=item_features_list,
    verbose=True,
    epochs=30,
    num_threads=2,
)

Epoch: 100%|██████████| 30/30 [02:13<00:00,  4.44s/it]


<lightfm.lightfm.LightFM at 0x7f6d83cb24d0>

In [11]:
# from lightfm.evaluation import precision_at_k
# precision = precision_at_k(model, interactions, k=5, num_threads=2, 
#     user_features=user_features_list,
#     item_features=item_features_list,
# )

# print('Precision: ', precision.mean())

In [35]:

# predict(user_ids, item_ids, item_features=None, user_features=None, num_threads=1)
import numpy as np
book_ids = np.arange(len(ratings["ISBN"].unique()))
user_ids = np.arange(len(ratings["User-ID"].unique()))

# predict for user-id 20
pred = model.predict(
    user_ids=np.repeat(20, len(book_ids)),
    item_ids=book_ids,
    user_features=user_features_list,
    item_features=item_features_list,
    num_threads=2
)
print(pred)

max_index = np.argmax(pred)
ISBN = ratings["ISBN"].unique()[max_index]
print("Top Recommendation is " + books[books['ISBN']==ISBN]['Book-Title'])

[-14.785079 -15.492586 -14.456781 ... -16.723995 -16.38905  -16.219461]
26    Top Recommendation is Wild Animus
Name: Book-Title, dtype: object
