## Quick Start
* [Quick Start](https://making.lyst.com/lightfm/docs/quickstart.html)
* [Examples](https://making.lyst.com/lightfm/docs/examples.html)

In [38]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, recall_at_k

In [39]:
data = fetch_movielens(min_rating=3.5)

In [40]:
print(repr(data['train']))
print(repr(data['test']))
print(repr(data['item_features']))

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>
<1682x1682 sparse matrix of type '<class 'numpy.float32'>'
	with 1682 stored elements in Compressed Sparse Row format>


In [41]:
model = LightFM(loss='bpr')
%time model.fit(data['train'], epochs=30, num_threads=10)

CPU times: user 4.11 s, sys: 0 ns, total: 4.11 s
Wall time: 414 ms


<lightfm.lightfm.LightFM at 0x7fca25355dc0>

In [44]:
train_precision = precision_at_k(model, data['train'], k=5).mean()
train_recall = recall_at_k(model, data['train'], k=10).mean()
test_precision = precision_at_k(model, data['test'], k=5).mean()
test_recall = recall_at_k(model, data['test'], k=10).mean()
print(train_precision)
print(test_precision)

print('train_recall: ', train_recall)
print('test recall: ', test_recall)

0.56985146
0.063383296
train_recall:  0.18168954176018998
test recall:  0.10359437136739064


## Building Dataset
* [Building Dataset](https://making.lyst.com/lightfm/docs/examples/dataset.html)
* [Dataset](https://making.lyst.com/lightfm/docs/lightfm.data.html)

In [23]:
import os
import zipfile
import csv
import requests

def _download(url: str, dest_path: str):
    req = requests.get(url, stream=True)
    req.raise_for_status()
    with open(dest_path, "wb") as fd:
        for chunk in req.iter_content(chunk_size=2 ** 20):
            fd.write(chunk)

def get_data():
    ratings_url = ("http://www2.informatik.uni-freiburg.de/" "~cziegler/BX/BX-CSV-Dump.zip")
    if not os.path.exists("data"):
        os.makedirs("data")
        _download(ratings_url, "data/data.zip")
    with zipfile.ZipFile("data/data.zip") as archive:
        return (
            csv.DictReader(
                (x.decode("utf-8", "ignore") for x in archive.open("BX-Book-Ratings.csv")),
                delimiter=";",
            ),
            csv.DictReader(
                (x.decode("utf-8", "ignore") for x in archive.open("BX-Books.csv")), delimiter=";"
            ),
        )

def get_ratings():
    return get_data()[0]

def get_book_features():
    return get_data()[1]

In [24]:
import json
from itertools import islice
ratings, book_features = get_data()

In [25]:
for row in islice(ratings, 2):
    print(json.dumps(row, indent=4))

print('-'* 10)

for row in islice(book_features, 1):
    print(json.dumps(row, indent=4))

{
    "User-ID": "276725",
    "ISBN": "034545104X",
    "Book-Rating": "0"
}
{
    "User-ID": "276726",
    "ISBN": "0155061224",
    "Book-Rating": "5"
}
----------
{
    "ISBN": "0195153448",
    "Book-Title": "Classical Mythology",
    "Book-Author": "Mark P. O. Morford",
    "Year-Of-Publication": "2002",
    "Publisher": "Oxford University Press",
    "Image-URL-S": "http://images.amazon.com/images/P/0195153448.01.THUMBZZZ.jpg",
    "Image-URL-M": "http://images.amazon.com/images/P/0195153448.01.MZZZZZZZ.jpg",
    "Image-URL-L": "http://images.amazon.com/images/P/0195153448.01.LZZZZZZZ.jpg"
}


In [26]:
from lightfm.data import Dataset
data = Dataset()
data.fit((row['User-ID'] for row in get_ratings()), (row['ISBN'] for row in get_ratings()))

In [10]:
print("num of user {}, items {}".format(*data.interactions_shape()))

num of user 105283, items 340553


In [31]:
from collections import Counter
all_features = [x['Book-Author'] for x in get_book_features()]
len(Counter(all_features).keys())

102043

In [32]:
data.fit_partial(items=(x['ISBN'] for x in get_book_features()),
                    item_features=all_features)
data.item_features_shape()
# 443805 - 341762 = 102043

(341762, 443805)

### Build interactions

In [16]:
interactions, weight = data.build_interactions(((x['User-ID'], x['ISBN'], int(x['Book-Rating'])) for x in get_ratings()))
print(repr(interactions))

<105283x341762 sparse matrix of type '<class 'numpy.int32'>'
	with 1149780 stored elements in COOrdinate format>


In [17]:
item_features = data.build_item_features(((x['ISBN'], [x['Book-Author']])
                                              for x in get_book_features()))
print(repr(item_features))

<341762x443805 sparse matrix of type '<class 'numpy.float32'>'
	with 613141 stored elements in Compressed Sparse Row format>


In [19]:
model = LightFM()
model.fit(interactions, item_features=item_features)

<lightfm.lightfm.LightFM at 0x7fca2534f460>