In [25]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

df_books = pd.read_csv(
    books_filename,
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'}
)

df_ratings = pd.read_csv(
    ratings_filename,
    encoding="ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'}
)

user_rating_counts = df_ratings['user'].value_counts()
df_ratings = df_ratings[df_ratings['user'].isin(user_rating_counts[user_rating_counts >= 200].index)]

book_rating_counts = df_ratings['isbn'].value_counts()
df_ratings = df_ratings[df_ratings['isbn'].isin(book_rating_counts[book_rating_counts >= 100].index)]

df = pd.merge(df_ratings, df_books, on='isbn')

book_pivot = df.pivot_table(index='title', columns='user', values='rating').fillna(0)
book_matrix = csr_matrix(book_pivot.values)

model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(book_matrix)

def get_recommends(book=""):
    if book not in book_pivot.index:
        return [book, []]

    book_idx = np.where(book_pivot.index == book)[0][0]

    distances, indices = model_knn.kneighbors(
        book_pivot.iloc[book_idx, :].values.reshape(1, -1),
        n_neighbors=6
    )

    recommended_books = []
    for i in range(1, len(indices.flatten())):
        recommended_books.append([
            book_pivot.index[indices.flatten()[i]],
            distances.flatten()[i]
        ])

    recommended_books = sorted(recommended_books, key=lambda x: x[1], reverse=False)

    return [book, recommended_books]

print(get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))"))

def test_book_recommendation():
    test_pass = True
    recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
    if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
        test_pass = False
    recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
    recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
    for i in range(2):
        if recommends[1][i][0] not in recommended_books:
            test_pass = False
        if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
            test_pass = False
    if test_pass:
        print("You passed the challenge! 🎉🎉🎉🎉🎉")
    else:
        print("You haven't passed yet. Keep trying!")

test_book_recommendation()

['The Queen of the Damned (Vampire Chronicles (Paperback))', []]
You haven't passed yet. Keep trying!
