In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df = pd.read_csv('/content/data.csv')

In [None]:
df.head()

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
0,9780002005883,2005883,Gilead,,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0
1,9780002261982,2261987,Spider's Web,A Novel,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0
2,9780006163831,6163831,The One Tree,,Stephen R. Donaldson,American fiction,http://books.google.com/books/content?id=OmQaw...,Volume Two of Stephen Donaldson's acclaimed se...,1982.0,3.97,479.0,172.0
3,9780006178736,6178731,Rage of angels,,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0
4,9780006280897,6280897,The Four Loves,,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0


In [None]:
selected_features = ['title','authors','categories','published_year']
print(selected_features)

['title', 'authors', 'categories', 'published_year']


In [None]:
for feature in selected_features:
    df[feature] = df[feature].fillna('')

In [None]:
combined_features = df['title'] + ' ' + df['categories'] + ' ' + df['authors'] + ' ' + f"{df['published_year']}"
combined_features

0       Gilead Fiction Marilynne Robinson 0       2004...
1       Spider's Web Detective and mystery stories Cha...
2       The One Tree American fiction Stephen R. Donal...
3       Rage of angels Fiction Sidney Sheldon 0       ...
4       The Four Loves Christian life Clive Staples Le...
                              ...                        
6805    I Am that Philosophy Sri Nisargadatta Maharaj;...
6806    Secrets Of The Heart Mysticism Khalil Gibran 0...
6807    Fahrenheit 451 Book burning Ray Bradbury 0    ...
6808    The Berlin Phenomenology History Georg Wilhelm...
6809    'I'm Telling You Stories' Literary Criticism H...
Length: 6810, dtype: object

In [None]:
vectorizer = TfidfVectorizer()

feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

  (0, 6894)	0.06443750710614472
  (0, 2840)	0.06443750710614472
  (0, 143)	0.06443750710614472
  (0, 5557)	0.06443750710614472
  (0, 7629)	0.06443750710614472
  (0, 6655)	0.06443750710614472
  (0, 102)	0.06443750710614472
  (0, 142)	0.06443750710614472
  (0, 91)	0.06443750710614472
  (0, 141)	0.06443750710614472
  (0, 140)	0.06443750710614472
  (0, 139)	0.06443750710614472
  (0, 103)	0.06443750710614472
  (0, 138)	0.06443750710614472
  (0, 110)	0.06443750710614472
  (0, 98)	0.12887501421228945
  (0, 92)	0.06443750710614472
  (0, 108)	0.06443750710614472
  (0, 112)	0.12887501421228945
  (0, 8035)	0.43700046813706117
  (0, 6013)	0.5885172279797457
  (0, 3422)	0.11189605685054443
  (0, 3882)	0.5885172279797457
  (1, 1822)	0.3013956698681646
  (1, 267)	0.30498953565603815
  :	:
  (6809, 10330)	0.33175135989257093
  (6809, 2240)	0.2206043839379868
  (6809, 9492)	0.30218650531542857
  (6809, 10435)	0.26439634843355025
  (6809, 5684)	0.20438577371569946
  (6809, 9065)	0.21319651041165227
  (6

In [None]:
similarity = cosine_similarity(feature_vectors, feature_vectors)

In [None]:
print(similarity)

[[1.         0.08005311 0.11641123 ... 0.08492469 0.08320896 0.07576596]
 [0.08005311 1.         0.08011208 ... 0.06549297 0.06416982 0.10645452]
 [0.11641123 0.08011208 1.         ... 0.08498725 0.09570742 0.07582177]
 ...
 [0.08492469 0.06549297 0.08498725 ... 1.         0.06807484 0.06198557]
 [0.08320896 0.06416982 0.09570742 ... 0.06807484 1.         0.06073329]
 [0.07576596 0.10645452 0.07582177 ... 0.06198557 0.06073329 1.        ]]


In [None]:
list_of_all_titles = df['title'].tolist()
print(list_of_all_titles)



In [None]:
book_name = input(' Enter your favourite book name : ')

 Enter your favourite book name : angels


In [None]:
find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)
print(find_close_match)

['Angels', 'Strangers', 'Wanderlust']


In [None]:
close_match = find_close_match[0]
index_of_the_book = df[df.title == close_match].index[0]

In [None]:
similarity_score = list(enumerate(similarity[index_of_the_book]))
print(similarity_score)

[(0, 0.10963613430561164), (1, 0.08455016272642801), (2, 0.10971690067318288), (3, 0.32161759886187513), (4, 0.09049087023016844), (5, 0.08416795607204833), (6, 0.11243656785309714), (7, 0.08461437947736322), (8, 0.07231537541063024), (9, 0.09537349712418879), (10, 0.08576990368775125), (11, 0.09689329796019117), (12, 0.09487528739448084), (13, 0.07504306186575796), (14, 0.07889094896584606), (15, 0.09241589863700679), (16, 0.09525217123691912), (17, 0.08137249063546019), (18, 0.10332544058090262), (19, 0.09478197624776963), (20, 0.0940638772755883), (21, 0.090304840082316), (22, 0.0953283977518215), (23, 0.0801846896844007), (24, 0.09160603131879752), (25, 0.08818491494828115), (26, 0.08303539651252427), (27, 0.0864903341420571), (28, 0.11235332131439331), (29, 0.0934530606946781), (30, 0.07731060046043557), (31, 0.09142845811706216), (32, 0.09285806757979212), (33, 0.08208615162398068), (34, 0.10025335578271868), (35, 0.08182948796886716), (36, 0.08074659883287301), (37, 0.0745030298

In [None]:
sorted_similar_books = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_books)

[(6114, 1.0), (4758, 0.6275903888815336), (4303, 0.5661659176611622), (2863, 0.5575217894467601), (3023, 0.3622601883856181), (5898, 0.3606157009134968), (6166, 0.3424901329104728), (3964, 0.3350635378010119), (4257, 0.3303062971178746), (5038, 0.32465204201525705), (3, 0.32161759886187513), (1038, 0.30685312480733057), (6377, 0.3006590419797013), (4692, 0.28274261498329567), (652, 0.28036109229017675), (4254, 0.27862531632585175), (4894, 0.2725322029660783), (4253, 0.2675840160375931), (5292, 0.2542507736261825), (4691, 0.25407014835096764), (224, 0.24111174414569345), (3752, 0.24108263068912336), (4282, 0.2352078460300898), (225, 0.23356474338490926), (3681, 0.22706707319494498), (4274, 0.22441427855599252), (5216, 0.22385995795933375), (226, 0.22377193437189946), (4093, 0.22377193437189946), (5766, 0.22354060332259207), (5801, 0.22107918558819653), (4993, 0.2174159356707091), (4996, 0.21645812245273727), (4834, 0.21550470831530438), (6459, 0.21121025882605757), (1794, 0.205704512107

In [None]:
top_sim = sorted_similar_books[:5]
top_sim

[(6114, 1.0),
 (4758, 0.6275903888815336),
 (4303, 0.5661659176611622),
 (2863, 0.5575217894467601),
 (3023, 0.3622601883856181)]

In [None]:
i = 1

for book in sorted_similar_books:
    index = book[0]
    title_from_index = df[df.index==index]['title'].values[0]
    if (i < 6):
        print(i, '-', title_from_index)
        i += 1

1 - Angels
2 - Godslayer
3 - The Sundering
4 - Kushiel's Justice
5 - What Angels Fear


In [None]:
book_name = input(' Enter your favourite book name : ')

list_of_all_titles = df['title'].tolist()

find_close_match = difflib.get_close_matches(book_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_book = df[df.title == close_match].index[0]

similarity_score = list(enumerate(similarity[index_of_the_book]))

sorted_similar_books = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Books suggested for you : \n')

i = 1

for book in sorted_similar_books:
    index = book[0]
    title_from_index = df[df.index==index]['title'].values[0]
    if (i < 30):
        print(i, '.',title_from_index)
        i+=1


 Enter your favourite book name : wing of fire
Books suggested for you : 

1 . Wings of Fire
2 . Legacy of the Dead
3 . Search the Dark
4 . Watchers of Time
5 . A False Mirror
6 . A Cold Treachery
7 . A Fearsome Doubt
8 . Flap Your Wings
9 . On Wings of Eagles
10 . The Broken Wings
11 . Born in Blood and Fire
12 . The Gate of Fire
13 . Gates Of Fire
14 . A Gift of Wings
15 . River of Blue Fire
16 . The Sixties
17 . Angel Fire East
18 . Women
19 . Wild Fire
20 . Fire in the Sky
21 . Harry Potter and the Goblet of Fire
22 . Four Novels
23 . Fire Watch
24 . Fire Sea
25 . A Christmas Carol
26 . Media Unlimited
27 . The Intellectuals and the Flag
28 . A Christmas Carol
29 . A Christmas Carol
