In [1]:
# Import needed modules
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Read data
book_data = pd.read_csv('book.csv')
book_data.head()

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
0,9780002005883,2005883,Gilead,,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0
1,9780002261982,2261987,Spider's Web,A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0
2,9780006163831,6163831,The One Tree,,Stephen R. Donaldson,American fiction,http://books.google.com/books/content?id=OmQaw...,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97,479.0,172.0
3,9780006178736,6178731,Rage of angels,,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0
4,9780006280897,6280897,The Four Loves,,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0


In [3]:
# Get data information
book_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6810 entries, 0 to 6809
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   isbn13          6810 non-null   int64  
 1   isbn10          6810 non-null   object 
 2   title           6810 non-null   object 
 3   subtitle        2381 non-null   object 
 4   authors         6738 non-null   object 
 5   categories      6711 non-null   object 
 6   thumbnail       6481 non-null   object 
 7   description     6548 non-null   object 
 8   published_year  6804 non-null   float64
 9   average_rating  6767 non-null   float64
 10  num_pages       6767 non-null   float64
 11  ratings_count   6767 non-null   float64
dtypes: float64(4), int64(1), object(7)
memory usage: 638.6+ KB


In [4]:
# Selecting the relevant features for recommendation
selected_features = ['title','authors','categories','published_year']
# Replacing the null valuess with null string
for feature in selected_features:
    book_data[feature] = book_data[feature].fillna('')

In [5]:
# combining all the 4 selected features
combined_features = book_data['title'] + ' ' + book_data['categories'] + ' ' + book_data['authors'] + ' ' + f"{book_data['published_year']}"
combined_features

0       Gilead Fiction Marilynne Robinson 0       2004...
1       Spider's Web Detective and mystery stories Cha...
2       The One Tree American fiction Stephen R. Donal...
3       Rage of angels Fiction Sidney Sheldon 0       ...
4       The Four Loves Christian life Clive Staples Le...
                              ...                        
6805    I Am that Philosophy Sri Nisargadatta Maharaj;...
6806    Secrets Of The Heart Mysticism Khalil Gibran 0...
6807    Fahrenheit 451 Book burning Ray Bradbury 0    ...
6808    The Berlin Phenomenology History Georg Wilhelm...
6809    'I'm Telling You Stories' Literary Criticism H...
Length: 6810, dtype: object

In [6]:
# converting the text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)
print(feature_vectors)

  (0, 6894)	0.06443750710614472
  (0, 2840)	0.06443750710614472
  (0, 143)	0.06443750710614472
  (0, 5557)	0.06443750710614472
  (0, 7629)	0.06443750710614472
  (0, 6655)	0.06443750710614472
  (0, 102)	0.06443750710614472
  (0, 142)	0.06443750710614472
  (0, 91)	0.06443750710614472
  (0, 141)	0.06443750710614472
  (0, 140)	0.06443750710614472
  (0, 139)	0.06443750710614472
  (0, 103)	0.06443750710614472
  (0, 138)	0.06443750710614472
  (0, 110)	0.06443750710614472
  (0, 98)	0.12887501421228945
  (0, 92)	0.06443750710614472
  (0, 108)	0.06443750710614472
  (0, 112)	0.12887501421228945
  (0, 8035)	0.43700046813706117
  (0, 6013)	0.5885172279797457
  (0, 3422)	0.11189605685054443
  (0, 3882)	0.5885172279797457
  (1, 1822)	0.3013956698681646
  (1, 267)	0.30498953565603815
  :	:
  (6809, 10330)	0.33175135989257093
  (6809, 2240)	0.2206043839379868
  (6809, 9492)	0.30218650531542857
  (6809, 10435)	0.26439634843355025
  (6809, 5684)	0.20438577371569946
  (6809, 9065)	0.21319651041165227
  (6

In [7]:
# getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors, feature_vectors)
print(similarity)

[[1.         0.08005311 0.11641123 ... 0.08492469 0.08320896 0.07576596]
 [0.08005311 1.         0.08011208 ... 0.06549297 0.06416982 0.10645452]
 [0.11641123 0.08011208 1.         ... 0.08498725 0.09570742 0.07582177]
 ...
 [0.08492469 0.06549297 0.08498725 ... 1.         0.06807484 0.06198557]
 [0.08320896 0.06416982 0.09570742 ... 0.06807484 1.         0.06073329]
 [0.07576596 0.10645452 0.07582177 ... 0.06198557 0.06073329 1.        ]]


In [8]:
# creating a list with all the book names given in the dataset
list_of_all_titles = book_data['title'].tolist()
print(list_of_all_titles)



In [9]:
# getting the book name from the user
book_name = input(' Enter your favourite book name : ') # input: Rage of angels
# finding the close match for the book name given by the user
find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)
print(find_close_match)

 Enter your favourite book name :  How the Mind Works


['How the Mind Works', 'Hear the Wind Blow', 'The Major Works']


In [10]:
# finding the index of the book with title
close_match = find_close_match[0]
index_of_the_book = book_data[book_data.title == close_match].index[0]

In [11]:
# getting a list of similar books
similarity_score = list(enumerate(similarity[index_of_the_book]))
print(similarity_score)

[(0, 0.09599722791861831), (1, 0.07403199039451844), (2, 0.11041655167436247), (3, 0.08923391819920193), (4, 0.09106791923142246), (5, 0.08470468463776365), (6, 0.09844928315781101), (7, 0.08515395482949488), (8, 0.07277652154669763), (9, 0.09598168202860662), (10, 0.07509999367470917), (11, 0.0848396203221019), (12, 0.08307265342340894), (13, 0.07552160211838825), (14, 0.06907679166687637), (15, 0.09300522330448799), (16, 0.08340265232706001), (17, 0.08189139286649534), (18, 0.10398433403766447), (19, 0.08299095032912424), (20, 0.0823621839909345), (21, 0.07907077689707226), (22, 0.0959362950631626), (23, 0.07020958901889253), (24, 0.10417028521570881), (25, 0.07721457376157063), (26, 0.08356490288708349), (27, 0.07573080145500474), (28, 0.09837639261857413), (29, 0.09404899921533585), (30, 0.06769304098444302), (31, 0.09201148599941597), (32, 0.09345021190351235), (33, 0.0933447034417628), (34, 0.08778168170224672), (35, 0.07164977182718034), (36, 0.07070159579145066), (37, 0.0847215

In [12]:
# sorting the books based on their similarity score
sorted_similar_books = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_books)

[(712, 1.0), (975, 0.47134515085328843), (376, 0.41775828851332286), (1243, 0.35838984154129977), (5438, 0.34901352523483736), (532, 0.32089310912426267), (4219, 0.31598739697810857), (3056, 0.3089682079744547), (4220, 0.30193085557975974), (5653, 0.29678787759174224), (2144, 0.2872088203714093), (5210, 0.2870451686260101), (1178, 0.28357139163588885), (6423, 0.2803047388603221), (1263, 0.27793107920650056), (3479, 0.2741266616145122), (3305, 0.272349503076936), (5341, 0.27086529056760666), (2368, 0.26698969549135093), (2911, 0.2633565355528812), (3474, 0.26333773712230946), (3484, 0.2623775143873142), (535, 0.2586016126462647), (4490, 0.2549876044397041), (3460, 0.2497977578892287), (3788, 0.24661806488554422), (188, 0.24453256861534967), (5441, 0.2427817622231843), (4546, 0.24227182856161567), (4714, 0.24113291383305738), (1267, 0.2407472449482681), (4693, 0.23975870888108847), (2302, 0.23958137108054015), (2304, 0.23958137108054015), (2305, 0.23958137108054015), (5536, 0.23739146823

In [13]:
top_sim = sorted_similar_books[:5]
top_sim

[(712, 1.0),
 (975, 0.47134515085328843),
 (376, 0.41775828851332286),
 (1243, 0.35838984154129977),
 (5438, 0.34901352523483736)]

In [14]:
# print the name of similar books based on the index
i = 1
for book in sorted_similar_books:
    index = book[0]
    title_from_index = book_data[book_data.index==index]['title'].values[0]
    if (i < 6):
        print(i, '-', title_from_index)
        i += 1

1 - How the Mind Works
2 - The Blank Slate
3 - The Language Instinct
4 - Philosophy of Mind
5 - Complete Works


In [17]:
book_name = input(' Enter your favourite book: ')
list_of_all_titles = book_data['title'].tolist()
find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)
close_match = find_close_match[0]
index_of_the_book = book_data[book_data.title == close_match].index[0]
similarity_score = list(enumerate(similarity[index_of_the_book]))
sorted_similar_books = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print('Recommended Books: \n')

i = 1
for book in sorted_similar_books:
    index = book[0]
    title_from_index = book_data[book_data.index==index]['title'].values[0]
    if (i <= 30):
        print(i, '.',title_from_index)
        i+=1

 Enter your favourite book:  How the Mind Works


Recommended Books: 

1 . How the Mind Works
2 . The Blank Slate
3 . The Language Instinct
4 . Philosophy of Mind
5 . Complete Works
6 . The Complete Works
7 . The complete works of Aristotle
8 . Ten Great Works of Philosophy
9 . The Complete Works of Aristotle
10 . The Mind Parasites
11 . The Basic Works of Aristotle
12 . The Beatles and Philosophy
13 . The Major Works
14 . The Ghost Map
15 . The Major Works
16 . Last of the Amazons
17 . Spinoza
18 . Beyond the Postmodern Mind
19 . Redemption
20 . The War of Art
21 . Tides of War
22 . Gates Of Fire
23 . How to Have a Beautiful Mind
24 . Political Philosophy
25 . The Mind's I
26 . Western Philosophy
27 . The Perennial Philosophy
28 . The Peloponnesian War
29 . The Story of Philosophy
30 . Children of the Mind
