In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

import warnings
warnings.filterwarnings("ignore")

# Read dataset
books = pd.read_csv("BX_Books.csv", delimiter=";", encoding = "ISO-8859-1", error_bad_lines=False, warn_bad_lines=False)
users = pd.read_csv("BX-Users.csv", delimiter=";", encoding = "ISO-8859-1", error_bad_lines=False, warn_bad_lines=False)
ratings = pd.read_csv("BX-Book-Ratings.csv", delimiter=";", encoding = "ISO-8859-1", error_bad_lines=False, warn_bad_lines=False)

# Show top-5 records
print(ratings.head())

   User-ID        ISBN  Book-Rating
0   276725  034545104X            0
1   276726  0155061224            5
2   276727  0446520802            0
3   276729  052165615X            3
4   276729  0521795028            6


In [2]:
# Join ratings and books dataframes
rating_books=pd.merge(ratings,books,on="ISBN")

# Shape of the data
rating_books.shape

(1031175, 10)

In [3]:
# Reducing Data Due to Memory Constraints
# Check the total number of rows in the DataFrame
total_rows = rating_books.shape[0]

# Calculate the number of rows to retain when removing 50% of the data
rows_to_remove = int(total_rows * 0.99999)

# Obtain indices of randomly selected rows to be retained
indices_to_remove = rating_books.sample(n=rows_to_remove, random_state=42).index

# Remove rows using the obtained indices
df = rating_books.drop(indices_to_remove)

In [4]:

# Create Item-user matrix using pivot_table()
rating_books_pivot = df.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating').fillna(0)

# Show top-5 records
rating_books_pivot.head()

User-ID,35028,46398,48261,56271,83109,114868,174326,204929,225731,227705,273110
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Adventures of Sherlock Holmes (Wordsworth Collection),0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FIRST WIVES CLUB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Firestorm (Anna Pigeon Mysteries (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Foucault's Pendulum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Guilty as Sin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:

# Import NearestNeighbors
from sklearn.neighbors import NearestNeighbors

# Build NearestNeighbors Object
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=7, n_jobs=-1)

# Fit the NearestNeighbor
model_knn.fit(rating_books_pivot)

NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=7)

In [6]:

# Get top 10 nearest neighbors 
indices=model_knn.kneighbors(rating_books_pivot.loc[['FIRST WIVES CLUB']], 10, return_distance=False)

# Print the recommended books
print("\nRecommended Books")
print("==================")
for index, value in enumerate(rating_books_pivot.index[indices][0]):
    print((index+1),". ",value)


Recommended Books
1 .  Portnoy et Son complexe
2 .  How to Make Your Science Project Scientific
3 .  The Promise
4 .  The Bear and the Dragon (Jack Ryan Novels)
5 .  The Apocrypha: Authorized Version of the Books Not in the Bible
6 .  Adventures of Sherlock Holmes (Wordsworth Collection)
7 .  Firestorm (Anna Pigeon Mysteries (Paperback))
8 .  Foucault's Pendulum
9 .  FIRST WIVES CLUB
10 .  Guilty as Sin


In [10]:

# Get top 10 nearest neighbors 
indices=model_knn.kneighbors(rating_books_pivot.loc[['The Promise']], 10, return_distance=False)

# Print the recommended books
print("\nRecommended Books")
print("==================")
for index, value in enumerate(rating_books_pivot.index[indices][0]):
    print((index+1),". ",value)


Recommended Books
1 .  Portnoy et Son complexe
2 .  How to Make Your Science Project Scientific
3 .  The Promise
4 .  The Bear and the Dragon (Jack Ryan Novels)
5 .  The Apocrypha: Authorized Version of the Books Not in the Bible
6 .  Adventures of Sherlock Holmes (Wordsworth Collection)
7 .  Firestorm (Anna Pigeon Mysteries (Paperback))
8 .  Foucault's Pendulum
9 .  FIRST WIVES CLUB
10 .  Guilty as Sin
