In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
books = pd.read_csv('BX-Books.csv', sep=';', on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [4]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [5]:
books = books[['ISBN','Book-Title','Book-Author','Year-Of-Publication','Publisher']]

In [6]:
books.head(3)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial


In [7]:
books.rename(columns={"ISBN":"Book-ID",
                      "Book-Title":"Title",
                      "Book-Author":"Author",
                      "Year-Of-Publication":"Year"},inplace=True)

In [8]:
books.head(3)

Unnamed: 0,Book-ID,Title,Author,Year,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial


In [9]:
users = pd.read_csv('BX-Users.csv', sep=';', on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [10]:
users.shape

(278858, 3)

In [11]:
ratings = pd.read_csv('BX-Book-Ratings.csv', sep=';', on_bad_lines='skip', encoding='latin-1', low_memory=False)

In [12]:
ratings.head(5)

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [13]:
ratings.rename(columns={"ISBN": "Book-ID", "Book-Rating": "Rating"}, inplace=True)


In [14]:
ratings.shape

(1149780, 3)

In [15]:
ratings['User-ID'].value_counts()

User-ID
11676     13602
198711     7550
153662     6109
98391      5891
35859      5850
          ...  
69281         1
69239         1
69241         1
69245         1
276733        1
Name: count, Length: 105283, dtype: int64

In [16]:
x = ratings["User-ID"].value_counts()>200
x[x].head(5)
#x.head(3)

User-ID
11676     True
198711    True
153662    True
98391     True
35859     True
Name: count, dtype: bool

In [17]:
y = x[x].index

In [None]:
ratings = ratings[ratings['User-ID'].isin(y)]
ratings.head(3)

Unnamed: 0,User-ID,Book-ID,Rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8


In [19]:
ratings.head(3)

Unnamed: 0,User-ID,Book-ID,Rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8


In [20]:
books.head(3)

Unnamed: 0,Book-ID,Title,Author,Year,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial


In [21]:
ratings_with_book = ratings.merge(books, on = "Book-ID")

In [22]:
ratings_with_book.head(3)

Unnamed: 0,User-ID,Book-ID,Rating,Title,Author,Year,Publisher
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning


In [23]:
ratings_with_book.shape

(487671, 7)

In [24]:
no_of_ratings = ratings_with_book.groupby('Title')['Rating'].count().reset_index()

In [25]:
no_of_ratings.rename(columns={"Rating":"Num_of_ratings"},inplace = True)

In [26]:
no_of_ratings.head(3)

Unnamed: 0,Title,Num_of_ratings
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1


In [27]:
rating_table = ratings_with_book.merge(no_of_ratings, on = "Title")
rating_table.head(3)

Unnamed: 0,User-ID,Book-ID,Rating,Title,Author,Year,Publisher,Num_of_ratings
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,82
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,7
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning,1


In [28]:
rating_table = rating_table[rating_table['Num_of_ratings']>=50]
rating_table.head(3)

Unnamed: 0,User-ID,Book-ID,Rating,Title,Author,Year,Publisher,Num_of_ratings
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,82
13,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,133
15,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,108


In [29]:
rating_table.shape

(61853, 8)

In [30]:
rating_matrix = rating_table.pivot_table(columns="User-ID", index = 'Title', values = 'Rating')

In [31]:
rating_matrix.head(3)

User-ID,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,,,0.0,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,,...,,,,0.0,,,,,0.0,


In [32]:
rating_matrix.fillna(0,inplace = True)
rating_matrix.head(3)

User-ID,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [34]:
rm_sparse = csr_matrix(rating_matrix)

In [35]:
model = NearestNeighbors(algorithm = 'brute')
model.fit(rm_sparse)

In [36]:
dist, rmnds = model.kneighbors(rating_matrix.iloc[5,:].values.reshape(1,-1), n_neighbors = 5)

In [37]:
dist

array([[ 0.        , 38.34383914, 38.92621225, 39.16950344, 39.30648801]])

In [38]:
rmnds

array([[  5, 184, 536, 372, 311]])

In [39]:
rating_matrix.iloc[5,:]

User-ID
254       0.0
2276      0.0
2766      7.0
2977      0.0
3363      0.0
         ... 
275970    0.0
277427    0.0
277478    0.0
277639    0.0
278418    0.0
Name: A Bend in the Road, Length: 888, dtype: float64

In [40]:
for i in range(len(rmnds[0])):
    print(rating_matrix.index[rmnds[0][i]])

A Bend in the Road
Exclusive
The Cradle Will Fall
No Safe Place
Last Man Standing


In [41]:
print(len(rmnds[0]))

5


In [42]:
book_name = rating_matrix.index

In [43]:
book_name[5]

'A Bend in the Road'

In [48]:
np.where(rating_matrix.index == "A Bend in the Road")[0][0]

np.int64(5)

In [45]:
import pickle
pickle.dump(model,open('loaded_files/model.pkl','wb'))
pickle.dump(book_name,open('loaded_files/book_name.pkl','wb'))
pickle.dump(rating_table,open('loaded_files/rating_table.pkl','wb'))
pickle.dump(rating_matrix,open('loaded_files/rating_matrix.pkl','wb'))

In [46]:
def recommend(book_name):
    book_id = np.where(rating_matrix.index == book_name)[0][0]
    dist, rcmnds = model.kneighbors(rating_matrix.iloc[book_id,:].values.reshape(1,-1), n_neighbors=5)

    for i in range(len(rcmnds[0])):
        book = rating_matrix.index[rcmnds[0][i]]
        if book == book_name:
            print("The book which you recently read is "+book+"\n")
            print("The recommended books for you \n")
        else:
            print(book)

In [47]:
book_name = "Harry Potter and the Chamber of Secrets (Book 2)"
recommend(book_name)

The book which you recently read is Harry Potter and the Chamber of Secrets (Book 2)

The recommended books for you 

Harry Potter and the Prisoner of Azkaban (Book 3)
Harry Potter and the Goblet of Fire (Book 4)
Harry Potter and the Sorcerer's Stone (Book 1)
Exclusive
