In [1]:
# import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [2]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2025-11-08 09:48:04--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 104.26.2.33, 104.26.3.33, 172.67.70.149, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|104.26.2.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip’


2025-11-08 09:48:05 (71.6 MB/s) - ‘book-crossings.zip’ saved [26085508/26085508]

Archive:  book-crossings.zip
  inflating: BX-Book-Ratings.csv     
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


In [3]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [6]:
df_books.sample(5)

Unnamed: 0,isbn,title,author
108292,910313172,The Magic Words (Tale from the Care Bears),Maria B. Murad
78627,1885378017,A Guide to Biltmore Estate,Rachel Carley
149305,393313670,The Arabian Nights,Husain Haddawy
166630,8423311821,Algunos Muchachos (ColecciÃ³n Destinolibro ; v...,Ana Maria Matute
69423,689822294,Heaven (Coretta Scott King Author Award Winner),Angela Johnson


In [13]:
df_ratings.sample(5)

Unnamed: 0,user,isbn,rating
253135,58076,3475531356,0.0
240690,55492,1565049837,0.0
617490,149357,743411544,0.0
276553,64803,312983395,0.0
541202,130166,786884460,0.0


In [12]:
book_counts = df_ratings['isbn'].value_counts()
book_counts

Unnamed: 0_level_0,count
isbn,Unnamed: 1_level_1
0971880107,2502
0316666343,1295
0385504209,883
0060928336,732
0312195516,723
...,...
0671883917,1
0743257502,1
0767409752,1
0785263195,1


In [7]:
df_ratings.sample(5)

Unnamed: 0,user,isbn,rating
616114,148898,515122491,0.0
164247,36299,812506898,9.0
875621,212021,345339681,8.0
203456,46374,563165529,0.0
985367,236757,345413903,0.0


In [15]:
user_counts = df_ratings['user'].value_counts()
user_counts

Unnamed: 0_level_0,count
user,Unnamed: 1_level_1
11676,13602
198711,7550
153662,6109
98391,5891
35859,5850
...,...
119573,1
276706,1
276697,1
276679,1


In [19]:
df_ratings = df_ratings[
    df_ratings['user'].isin(user_counts[user_counts >= 200].index)
]

df_ratings = df_ratings[
    df_ratings['isbn'].isin(book_counts[book_counts >= 100].index)
]

df_ratings.sample(5)

Unnamed: 0,user,isbn,rating
969137,234597,0316788228,0.0
712062,172742,0380002930,10.0
1054863,251843,0671027360,10.0
261404,60244,1400031354,0.0
732607,177090,067088300X,0.0


In [17]:
df = pd.merge(df_ratings, df_books, on = 'isbn')
df.sample(5)

Unnamed: 0,user,isbn,rating,title,author
30127,170518,080411868X,0.0,"Welcome to the World, Baby Girl!",Fannie Flagg
17263,100846,014023313X,0.0,The Stone Diaries,Carol Shields
40119,229011,0440241537,0.0,The King of Torts,JOHN GRISHAM
34418,198711,0440226430,0.0,Summer Sisters,Judy Blume
27599,155219,0446363251,0.0,Scarlett : The Sequel to Margaret Mitchell's \...,Alexandra Ripley


In [39]:
book_matrix = df.pivot_table(
    index='title',
    columns='user',
    values='rating',
    aggfunc='mean'
).fillna(0)


In [40]:
print(book_matrix.shape)


(673, 888)


In [41]:
book_sparse = csr_matrix(book_matrix.values)
book_sparse

<Compressed Sparse Row sparse matrix of dtype 'float32'
	with 12469 stored elements and shape (673, 888)>

In [42]:
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(book_sparse)


In [53]:
# function to return recommended books - this will be tested
def get_recommends(book = ""):
  if book not in book_matrix.index:
        return f"'{book}' not found in dataset."

  book_index = book_matrix.index.get_loc(book)
  distances, indices = model.kneighbors(book_sparse[book_index], n_neighbors=5)

  recs = []
  for i in range(1, len(distances.flatten())):
      rec_title = book_matrix.index[indices.flatten()[i]]
      rec_distance = float(distances.flatten()[i])
      recs.append([rec_title, rec_distance])

  recommended_books = [book, recs]
  return recommended_books

In [50]:
get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")

['The Queen of the Damned (Vampire Chronicles (Paperback))',
 [['The Vampire Lestat (Vampire Chronicles, Book II)', 0.5178412199020386],
  ['The Tale of the Body Thief (Vampire Chronicles (Paperback))',
   0.5376338362693787],
  ['Interview with the Vampire', 0.7345068454742432],
  ['The Witching Hour (Lives of the Mayfair Witches)', 0.7448656558990479],
  ['Catch 22', 0.793983519077301]]]

In [54]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2):
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()

["Where the Heart Is (Oprah's Book Club (Paperback))", [['The Lovely Bones: A Novel', 0.7234864234924316], ['I Know This Much Is True', 0.7677075266838074], ['The Surgeon', 0.7699410915374756], ['The Weight of Water', 0.7708583474159241]]]
You haven't passed yet. Keep trying!
