In [1]:
# import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [2]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2022-07-16 15:45:26--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 104.26.3.33, 104.26.2.33, 172.67.70.149, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|104.26.3.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip’


2022-07-16 15:45:32 (3.86 MB/s) - ‘book-crossings.zip’ saved [26085508/26085508]

Archive:  book-crossings.zip
  inflating: BX-Book-Ratings.csv     
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


In [3]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [4]:
df_books.head(5)

Unnamed: 0,isbn,title,author
0,195153448,Classical Mythology,Mark P. O. Morford
1,2005018,Clara Callan,Richard Bruce Wright
2,60973129,Decision in Normandy,Carlo D'Este
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,393045218,The Mummies of Urumchi,E. J. W. Barber


In [5]:
df_books.dropna(inplace=True)

In [6]:
df_books.shape

(271378, 3)

In [7]:
df_ratings.head(5)

Unnamed: 0,user,isbn,rating
0,276725,034545104X,0.0
1,276726,0155061224,5.0
2,276727,0446520802,0.0
3,276729,052165615X,3.0
4,276729,0521795028,6.0


In [8]:
df_ratings.dropna(inplace=True)

In [14]:
ratings = df_ratings['user'].value_counts()
df_ratings_new = df_ratings[~df_ratings['user'].isin(ratings[ratings < 200].index)]

In [15]:
df_ratings_new

Unnamed: 0,user,isbn,rating
1456,277427,002542730X,10.0
1457,277427,0026217457,0.0
1458,277427,003008685X,8.0
1459,277427,0030615321,0.0
1460,277427,0060002050,0.0
...,...,...,...
1147612,275970,3829021860,0.0
1147613,275970,4770019572,0.0
1147614,275970,896086097,0.0
1147615,275970,9626340762,8.0


In [16]:
ratings = df_ratings['isbn'].value_counts()
df_ratings_new = df_ratings_new[~df_ratings['isbn'].isin(ratings[ratings < 100].index)]

  


In [17]:
df_ratings_new.shape

(49781, 3)

In [18]:
df_ratings_new

Unnamed: 0,user,isbn,rating
1456,277427,002542730X,10.0
1469,277427,0060930535,0.0
1471,277427,0060934417,0.0
1474,277427,0061009059,9.0
1484,277427,0140067477,0.0
...,...,...,...
1147304,275970,0804111359,0.0
1147436,275970,140003065X,0.0
1147439,275970,1400031346,0.0
1147440,275970,1400031354,0.0


In [34]:
df = pd.merge(df_ratings_new, df_books, on = 'isbn')

In [35]:
df1 = df.pivot_table(index = 'title',
                    columns = 'user',
                     values = 'rating',
                     fill_value = 0)

In [36]:
df1

user,254,2276,2766,2977,3363,4017,4385,6242,6251,6323,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1st to Die: A Novel,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2nd Chance,0,10,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4 Blondes,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Without Remorse,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Year of Wonders,0,0,0,7,0,0,0,7,0,0,...,0,0,0,0,0,0,0,0,0,0
You Belong To Me,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
csr_mat = csr_matrix(df1.values)

In [37]:
classifier = NearestNeighbors(metric = 'cosine')
classifier.fit(df1)

NearestNeighbors(metric='cosine')

In [39]:
d, i = classifier.kneighbors([df1.loc[name].values], n_neighbors=6)

In [40]:
print(d)
print(i)

[[2.22044605e-16 5.17841186e-01 5.37633845e-01 7.34506886e-01
  7.44865700e-01 7.93983542e-01]]
[[567 610 599 251 617 100]]


In [45]:
df1.iloc[i[0]].index.values

array(['The Queen of the Damned (Vampire Chronicles (Paperback))',
       'The Vampire Lestat (Vampire Chronicles, Book II)',
       'The Tale of the Body Thief (Vampire Chronicles (Paperback))',
       'Interview with the Vampire',
       'The Witching Hour (Lives of the Mayfair Witches)', 'Catch 22'],
      dtype=object)

In [46]:
pd.DataFrame({
    'title' : i[0],
    'distance' : d[0] 
}).sort_values(by = 'distance', ascending = False)

Unnamed: 0,title,distance
5,100,0.7939835
4,617,0.7448657
3,251,0.7345069
2,599,0.5376338
1,610,0.5178412
0,567,2.220446e-16


In [71]:
def get_recommends(book = ""):
    try:
        df1.loc[book]
    except:
        print("This book does not exist in our record.")

    d, i = classifier.kneighbors([df1.loc[book].values], n_neighbors=6)
    
    recommendations = pd.DataFrame({
        'title' : df1.iloc[i[0]].index.values, 
        'distance' : d[0]
        }).sort_values(by = 'distance', 
                       ascending = False).head().values
    
    recommended_books = [book, recommendations]

    return recommended_books