### Comments
 - Understend generator expressions

In [None]:
import os
import zipfile
import csv

In [7]:
import requests

In [8]:
def _download(url: str, dest_path: str):
    req = requests.get(url, stream = True)
    req.raise_for_status()
    
    with open(dest_path, "wb") as fd:
        for chunk in req.iter_content(chunk_size = 2 ** 20):
            fd.write(chunk)

In [9]:
def get_data():
    
    ratings_url = ("http://www2.informatik.uni-freiburg.de/" "~cziegler/BX/BX-CSV-Dump.zip")
    
    if not os.path.exists("data"):
        os.makedirs("data")
        
        _download(ratings_url, "data/data.zip")
        
    with zipfile.ZipFile("data/data.zip") as archive:
        return (
            csv.DictReader(
                (x.decode("utf-8", "ignore") for x in archive.open("BX-Book-Ratings.csv")), delimiter=";"),
            csv.DictReader(
                (x.decode("utf-8", "ignore") for x in archive.open("BX-Books.csv")), delimiter=";"),
        )
    
def get_ratings():
    return get_data()[0]

def get_book_features():
    return get_data()[1]

In [10]:
import json
from itertools import islice

In [11]:
ratings, book_features = get_data()

In [12]:
for line in islice(ratings,3):
    print(json.dumps(line, indent = 4 ))

{
    "User-ID": "276725",
    "ISBN": "034545104X",
    "Book-Rating": "0"
}
{
    "User-ID": "276726",
    "ISBN": "0155061224",
    "Book-Rating": "5"
}
{
    "User-ID": "276727",
    "ISBN": "0446520802",
    "Book-Rating": "0"
}


In [45]:
for line in islice(book_features, 10 ,11):
    print(json.dumps(line, indent = 4))

{
    "ISBN": "067176537X",
    "Book-Title": "The Therapeutic Touch: How to Use Your Hands to Help or to Heal",
    "Book-Author": "Dolores Krieger",
    "Year-Of-Publication": "1979",
    "Publisher": "Fireside",
    "Image-URL-S": "http://images.amazon.com/images/P/067176537X.01.THUMBZZZ.jpg",
    "Image-URL-M": "http://images.amazon.com/images/P/067176537X.01.MZZZZZZZ.jpg",
    "Image-URL-L": "http://images.amazon.com/images/P/067176537X.01.LZZZZZZZ.jpg"
}


### Dataset ID mappings

In [36]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit((x['User-ID'] for x in get_ratings()),(x['ISBN'] for x in get_ratings()))

In [37]:
num_users, num_items = dataset.interactions_shape()
print('Num users %d and num Items %d' % (num_users,num_items))

Num users 105283 and num Items 340553


In [38]:
dataset.fit_partial(items = (x['ISBN'] for x in get_book_features()),
                   item_features = (x['Book-Author'] for x in get_book_features()))

In [39]:
# Note that we fit some more item ids: this is to make sure our mappings are complete even if there are items 
# in the features dataset that are not in the interactions set.)
num_users, num_items = dataset.interactions_shape()
print('Num users %d and num Items %d' % (num_users,num_items))

Num users 105283 and num Items 341762


### Interactions matrix

In [40]:
(interactions, weights) = dataset.build_interactions(((x['User-ID'], x['ISBN']) for x in get_ratings()))
print(repr(interactions))

<105283x341762 sparse matrix of type '<class 'numpy.int32'>'
	with 1149780 stored elements in COOrdinate format>


In [42]:
item_features = dataset.build_item_features(((x['ISBN'], [x['Book-Author']])
                                              for x in get_book_features()))
print(repr(item_features))

<341762x443805 sparse matrix of type '<class 'numpy.float32'>'
	with 613141 stored elements in Compressed Sparse Row format>
