In [14]:
!pip install implicit
# Make sure that you have all requirements installed before starting, 
# highly recommend to go to the implicit repo to see all machine requirements. 
import pandas as pd
import json
import requests
import implicit
from scipy import sparse
pd.options.mode.chained_assignment = None


## Our dataset:
To get this dataset please go to: 
https://www.kaggle.com/zygmunt/goodbooks-10k#books.csv



book -- Una tabla de libros con toda su informacion

In [16]:
book = pd.read_csv('books.csv')
book.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
id                           10000 non-null int64
book_id                      10000 non-null int64
best_book_id                 10000 non-null int64
work_id                      10000 non-null int64
books_count                  10000 non-null int64
isbn                         9300 non-null object
isbn13                       9415 non-null float64
authors                      10000 non-null object
original_publication_year    9979 non-null float64
original_title               9415 non-null object
title                        10000 non-null object
language_code                8916 non-null object
average_rating               10000 non-null float64
ratings_count                10000 non-null int64
work_ratings_count           10000 non-null int64
work_text_reviews_count      10000 non-null int64
ratings_1                    10000 non-null int64
ratings_2                    10000 n

In [20]:
ratings = pd.read_csv('ratings.csv')
ratings = ratings[ratings['rating'] >= 4].dropna()
ratings.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 650327 entries, 0 to 981754
Data columns (total 3 columns):
book_id    650327 non-null int64
user_id    650327 non-null int64
rating     650327 non-null int64
dtypes: int64(3)
memory usage: 19.8 MB


to_read -- Una tabla con usuarios y cada libro que esta en su lista de leer.

In [40]:
to_read = pd.read_csv('to_read.csv')
to_read.head()

Unnamed: 0,user_id,book_id
0,1,112
1,1,235
2,1,533
3,1,1198
4,1,1874


In [21]:
train = book.merge(ratings, on='book_id')
train = train.drop_duplicates(['user_id','book_id'],keep='first')
train['score'] = train.rating.mod(4) + 1
train = train[['book_id','user_id','score']]
train.head()

Unnamed: 0,book_id,user_id,score
0,3,3662,1
1,3,5115,1
2,3,5272,2
3,3,5885,1
4,3,6063,2


In [23]:
train['book_cat'] = train['book_id'].astype('category').cat.codes
train['user_cat'] = train['user_id'].astype('category').cat.codes

book_map = train.drop_duplicates(['book_id','book_cat'],keep='last')
matrix = sparse.csr_matrix((
                train.score.values,
                (train.book_cat.values,
                train.user_cat.values)),
                shape=(len(train['book_cat'].unique()), len(train['user_cat'].unique())))
item_matrix = matrix.T.tocsr()
matrix

<812x23230 sparse matrix of type '<class 'numpy.int64'>'
	with 52946 stored elements in Compressed Sparse Row format>

In [38]:
model = implicit.als.AlternatingLeastSquares(factors=800,iterations=1,regularization=0.01)
model.fit(matrix)

100%|████████████████████████████████████████████████████████████████████████████████| 14.0/14 [00:00<00:00, 19.37it/s]


In [39]:
user_cat = 20
recommendation = model.recommend(user_cat,item_matrix,100,filter_already_liked_items=True)
recommendation

[(450, 0.054436143),
 (400, 0.045950767),
 (769, 0.04551361),
 (671, 0.042196408),
 (461, 0.03656152),
 (385, 0.031812377),
 (791, 0.030836033),
 (505, 0.029319003),
 (602, 0.02810475),
 (238, 0.027889963),
 (162, 0.026016302),
 (217, 0.025197253),
 (228, 0.025079984),
 (136, 0.0248105),
 (287, 0.024321048),
 (595, 0.023526367),
 (159, 0.023436077),
 (507, 0.02328435),
 (259, 0.023007367),
 (235, 0.022355888),
 (732, 0.02172082),
 (195, 0.021694543),
 (800, 0.02150014),
 (279, 0.021446295),
 (560, 0.021003263),
 (236, 0.020244367),
 (718, 0.01975438),
 (242, 0.019319665),
 (297, 0.019102752),
 (257, 0.018910158),
 (803, 0.01798072),
 (295, 0.017617803),
 (659, 0.01759948),
 (641, 0.0173206),
 (762, 0.017229829),
 (687, 0.017165244),
 (436, 0.016591834),
 (200, 0.016503317),
 (273, 0.016116813),
 (600, 0.01567422),
 (455, 0.015377777),
 (500, 0.015302204),
 (356, 0.015156066),
 (258, 0.0150397215),
 (416, 0.0146467015),
 (300, 0.014630882),
 (743, 0.014601708),
 (698, 0.014298257),
 (24

In [27]:
def get_book_info(book_cat):
    book_id = book_map[book_map.book_cat == book_cat]['book_id'].values[0]
    book_isbn = book[book.book_id == book_id]['isbn'].values[0]
#     book_isbn = '0' + str(book_isbn)
#     url = 'https://www.googleapis.com/books/v1/volumes?q=isbn:'
#     response = requests.get(url + book_isbn)
#     json_response = json.loads(response.text)
#     try:
#         title = json_response['items'][0]['volumeInfo']['title']
#         author = json_response['items'][0]['volumeInfo']['authors'][0]
#         category = json_response['items'][0]['volumeInfo']['categories'][0]
#         return title, author, category
#     except Exception:
#         return book_isbn
    return book[book.isbn == book_isbn][['authors','original_title']].values

In [37]:
for rec in recommendation:
    book_rec = rec[0]
    print(get_book_info(book_rec))

[['Maud Hart Lovelace, Lois Lenski' 'Betsy-Tacy']]
[['Tom Robbins' 'Still Life with Woodpecker']]
[['Tom Standage' 'A History of the World in 6 Glasses']]
[['Max Brooks' 'World War Z: An Oral History of the Zombie War']]
[['J.R.R. Tolkien' 'The Hobbit or There and Back Again']]
[["Julia Child, Alex Prud'Homme" 'My Life in France']]
[['Roald Dahl, Quentin Blake' 'The Witches']]
[['John Grisham' 'The Firm']]
[['Raymond Chandler' 'The Big Sleep']]
[['Jhumpa Lahiri' 'Interpreter of Maladies']]
[['Louis de Bernières' "Captain Corelli's Mandolin"]]
[['Scott Turow' 'The Burden of Proof']]
[['J.K. Rowling' nan]]
[['Michael Crichton' 'Rising Sun']]
[['Douglas Coupland' 'All Families Are Psychotic']]
[['Pat Barker' 'Regeneration']]
[['Ian Fleming' 'Moonraker']]
[['Hermann Hesse, Ursule Molinaro' 'Narziß und Goldmund']]
[['Ernest Hemingway' 'The Snows of Kilimanjaro and Other Stories']]
[['John Grisham' 'The Last Juror']]
[['Beryl Markham' 'West with the Night']]
[['Åsne Seierstad, Ingrid Christo

In [32]:
books_read = train[train.user_cat == user_cat]['book_cat'].values
for b in books_read:
    print(get_book_info(b))

[['Douglas Adams, Mark Carwardine' 'Last Chance to See']]


In [35]:
similar_items = model.similar_items(12)
print(f"Libros Similares a {get_book_info(12)}\n")
for tuple in similar_items:
    similar_item = tuple[0]
    print(get_book_info(similar_item))

Libros Similares a [['Bill Bryson' 'The Lost Continent: Travels in Small-Town America']]

[['Bill Bryson' 'The Lost Continent: Travels in Small-Town America']]
[['J.K. Rowling, Mary GrandPré'
  'Harry Potter and the Order of the Phoenix']]
[['Bill Bryson' 'A Short History of Nearly Everything']]
[['J.K. Rowling, Mary GrandPré' 'Harry Potter and the Half-Blood Prince']]
[['Bill Bryson' 'Notes from a Big Country']]
[['Bill Bryson' 'Down Under']]
[['Bill Bryson' 'Neither Here Nor There: Travels in Europe']]
[['Johanna Spyri, Angelo  Rinaldi, Beverly Cleary' 'Heidi']]
[['Douglas Adams'
  "The Ultimate Hitchhiker's Guide: Five Complete Novels and One Story"]]
[['Bill Bryson' 'Notes from a Small Island']]
