# Book Recommender System of Tech Books

In [1]:
import numpy as np
import pandas as pd
import warnings

import matplotlib.pyplot as plt

In [2]:
from sklearn.decomposition import PCA
warnings.filterwarnings('ignore', category=RuntimeWarning)

In [3]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [4]:
book_reviews = pd.read_csv('data/tech_books.csv',
                            encoding='utf-8')
book_reviews.drop('Location', axis=1, inplace=True)
pd.options.display.float_format = '{:,.2f}'.format

## Sample of dataset

In [5]:
book_reviews.head()

Unnamed: 0,UserID,ISBN,Rating,Title,TotalRatings
0,276822,805057706,10,The Number Devil: A Mathematical Adventure,4
1,27838,805057706,9,The Number Devil: A Mathematical Adventure,4
2,276859,789706032,8,The Complete Idiot's Guide to the Microsoft Ne...,1
3,277427,811811409,10,The Venetian's Wife: A Strangely Sensual Tale ...,37
4,11676,811811409,10,The Venetian's Wife: A Strangely Sensual Tale ...,37


## Top 5 Total Ratings per distinct books

In [6]:
top_total_ratings = book_reviews.nlargest(250, columns=['TotalRatings'])['Title'].unique()
book_reviews[book_reviews['Title'].isin(top_total_ratings)].drop_duplicates('Title')[['UserID', 'Title', 'TotalRatings']].reset_index(drop=True)

Unnamed: 0,UserID,Title,TotalRatings
0,277427,The Venetian's Wife: A Strangely Sensual Tale ...,37
1,278314,A Beautiful Mind: The Life of Mathematical Gen...,136
2,2358,Ringworld Engineers,37
3,6789,To Engineer Is Human: The Role of Failure in S...,13
4,16413,Innumeracy: Mathematical Illiteracy and Its Co...,13
5,20172,The Code Book: The Science of Secrecy from Anc...,11
6,71102,Fermat's Enigma: The Epic Quest to Solve the W...,11


In [25]:
%matplotlib notebook

plt.rcParams['figure.dpi'] = 65
book_reviews.TotalRatings.value_counts().plot(kind='bar')
plt.rc('font', size=12)
plt.title('Total Ratings Distribution')
plt.xlabel('Total Ratings')
plt.ylabel('Count')
plt.show()

<IPython.core.display.Javascript object>

## Top 5 books per Ratings

In [8]:
book_reviews.nlargest(20, columns=['Rating']).drop_duplicates('Title')[['UserID', 'Title', 'Rating']].reset_index(drop=True)

Unnamed: 0,UserID,Title,Rating
0,276822,The Number Devil: A Mathematical Adventure,10
1,277427,The Venetian's Wife: A Strangely Sensual Tale ...,10
2,36606,A Beautiful Mind: The Life of Mathematical Gen...,10
3,228595,Even Steven and Odd Todd (Hello Math Reader. L...,10
4,55548,Zen &amp; the Art of the Macintosh : Discoveri...,10


In [9]:
%matplotlib notebook

plt.rcParams['figure.dpi'] = 90
book_reviews.Rating.value_counts().plot(kind='bar')
plt.rc('font', size=8)
plt.title('Ratings Distribution')
plt.xlabel('Ratings')
plt.ylabel('Count')
plt.show()

<IPython.core.display.Javascript object>

## Descriptive statistics of Total Ratings

In [10]:
book_reviews['TotalRatings'].describe()

count   2,443.00
mean       10.90
std        31.03
min         1.00
25%         1.00
50%         1.00
75%         4.00
max       136.00
Name: TotalRatings, dtype: float64

In [11]:
book_title = "Rapid Development: Taming Wild Software Schedules"

In [12]:
books_rating_pivot = book_reviews.pivot(index='Title', columns='UserID', values='Rating').fillna(0)
books_rating_matrix = csr_matrix(books_rating_pivot.values)

book_list = list(books_rating_pivot.index)
finded_book = next((res for res in book_list if book_title in res), None)

In [13]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(books_rating_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [14]:
book_index = books_rating_pivot.index == finded_book
x = books_rating_pivot.iloc[book_index, :].values.reshape(1, -1)
distances, idx = model_knn.kneighbors(x, n_neighbors=5)

## Results of Collaborative Filtering Using K-Nearest Neighbors

In [29]:
print('Recommendations for "\033[1m{0}"\033[0m book:\n'.format(books_rating_pivot[book_index].index[0].format()))

for i in range(len(distances.flatten())):
    print('\033[1m"{0}"\033[0m, distance {1:.2f}'.format(books_rating_pivot.index[idx.flatten()[i]],distances.flatten()[i]))

Recommendations for "[1mRapid Development: Taming Wild Software Schedules"[0m book:

[1m"Software Engineering: A Practitioner's Approach"[0m, distance 0.00
[1m"E-Business: Roadmap for Success (Addison-Wesley Information Technology Series)"[0m, distance 0.00
[1m"Rapid Development: Taming Wild Software Schedules"[0m, distance 0.00
[1m"The New Direct Marketing: How to Implement a Profit-Driven Database Marketing Strategy"[0m, distance 0.00
[1m"MCSE Windows NT Server 4.0 Study Guide (Exam 70-67)"[0m, distance 0.00


In [16]:
pca = PCA(n_components=8, random_state=42)
pca_matrix = pca.fit_transform(books_rating_pivot)
corr = np.corrcoef(pca_matrix)

## Results of Collaborative Filtering Using Matrix Factorization 

In [17]:
print('Recommendations for "\033[1m{0}"\033[0m book:\n'.format(finded_book))

if finded_book is not None:
    corr_result = corr[book_list.index(finded_book)]
    corr_result = corr_result[~np.isnan(corr_result)]
    max_indices = np.argsort(corr_result)[-10:]
    books_title = books_rating_pivot.index
    top_result = books_title[max_indices]
    
    for result, score in reversed(list(zip(top_result, corr_result[max_indices]))):
        print('\033[1m"{0}"\033[0m, distance {1:.2f}'.format(result, score))
else:
    print("There are not recommended books!!!")

Recommendations for "[1mRapid Development: Taming Wild Software Schedules"[0m book:

[1m"The Business of Ecommerce : From Corporate Strategy to Technology (Breakthroughs in Application Development)"[0m, distance 1.00
[1m"Information Rules: A Strategic Guide to the Network Economy"[0m, distance 1.00
[1m"Rapid Development: Taming Wild Software Schedules"[0m, distance 1.00
[1m"E-Business: Roadmap for Success (Addison-Wesley Information Technology Series)"[0m, distance 1.00
[1m"Practical UNIX Security (Computer security)"[0m, distance 1.00
[1m"The New Direct Marketing: How to Implement a Profit-Driven Database Marketing Strategy"[0m, distance 1.00
[1m"Software Engineering: A Practitioner's Approach"[0m, distance 1.00
[1m"Real Estate Law (Prindle, Weber, and Schmidt Series in Mathematics)"[0m, distance 1.00
[1m"MCSE Windows NT Server 4.0 in the Enterprise Study Guide (Exam 70-68)"[0m, distance 1.00
[1m"MCSE Windows NT Server 4.0 Study Guide (Exam 70-67)"[0m, distance 1