In [1]:
import pandas as pd

book_info_df = pd.read_json('book_info_df.json')

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(book_info_df["mod_title"])

In [3]:
print(type(tfidf))
print(tfidf.shape)

<class 'scipy.sparse.csr.csr_matrix'>
(750505, 151922)


In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

# Use cosine similarity for search engine
def search(query, print_flag=0):
    processed = re.sub("[^a-zA-Z0-9 ]", "", query.lower())
    query_vec = vectorizer.transform([query])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -10)[-10:]
    results = book_info_df.iloc[indices]
    results = results.sort_values("num_ratings", ascending=False)

    key_info = ["book_id", "title", "num_ratings", "average_rating", "num_pages", "publication_year"]
    if print_flag:
        counter = 0
        rsp_list = []
        for index, row in results.iterrows():
            counter += 1
            rsp_list.append("Result " + str(counter) +
                            "\nbook_id: " + str(row['book_id']) +
                            "\ntitle: " + row['title'] +
                            "\nnum_ratings: " + str(row['num_ratings']) +
                            "\naverage_rating: " + str(row['average_rating']) +
                            "\nnum_pages: " + str(row['num_pages']) +
                            "\npublication_year: " + str(row['publication_year']) + "\n")
        rsp = ''.join(rsp_list)
        print(rsp)
    return results.head(5).style.format({'url': make_clickable, 'cover_image': show_image})

In [5]:
search("life of pi")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
106514,11123032,Life of Pi,3618,Goodreads,,eng,3.88,,482.0,,life of pi
603464,826373,Life of Pi,690,Goodreads,,eng,3.88,canongate books,432.0,2004.0,life of pi
674807,6375914,Life of Pi,661,Goodreads,,eng,3.88,canongate books,434.0,2009.0,life of pi
702809,820602,Life of Pi,385,Goodreads,,eng,3.88,highbridge company,12.0,2003.0,life of pi
379848,15939146,Life of Pi,312,Goodreads,,,3.88,canongate books,428.0,2012.0,life of pi


In [6]:
search("sherlock homes")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
55891,20662423,Sherlock: Chronicles,702,Goodreads,,eng,4.49,bbc books,320.0,2014.0,sherlock chronicles
225608,16135705,The Greatest Adventures of Sherlock Homes,565,Goodreads,,eng,4.41,fall river press,939.0,2012.0,the greatest adventures of sherlock homes
124590,353369,Two Homes,361,Goodreads,,eng,4.22,candlewick press,40.0,2003.0,two homes
420536,13603899,Sherlock Homes A Study in Scarlet and The Red Headed League,115,Goodreads,,eng,4.15,wonder books,64.0,,sherlock homes a study in scarlet and the red headed league
322593,355856,Houses and Homes,98,Goodreads,,,3.84,harpercollins,32.0,1995.0,houses and homes


In [7]:
search("man")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
752460,82895,A Man,1478,Goodreads,,eng,4.29,simon schuster,463.0,1980.0,a man
196767,551932,The Man,1058,Goodreads,,eng,4.05,,,,the man
551449,6677462,"Man To Man (Heroes, #1)",778,Goodreads,,eng,3.77,phaze books,197.0,2009.0,man to man heroes 1
133022,19442593,"Man to Man (Hero, #1)",189,Goodreads,,eng,3.77,,,,man to man hero 1
674491,52023,Between Man and Man,177,Goodreads,,eng,4.17,routledge,290.0,2002.0,between man and man


In [8]:
search("great expectations")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
670302,2623,Great Expectations,468462,Goodreads,,eng,3.75,oxford university press,505.0,1998.0,great expectations
549179,896853,Great Expectations,1067,Goodreads,,eng,3.75,wordsworth,423.0,2007.0,great expectations
18955,97720,Great Expectations,551,Goodreads,,,3.75,brilliance audio,518.0,2002.0,great expectations
670308,2625,Great Expectations,169,Goodreads,,,3.75,,,,great expectations
670307,2624,Great Expectations,132,Goodreads,,,3.75,brilliance audio,0.0,2005.0,great expectations


In [9]:
search("pachinko")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
251199,22839919,Larger Than Life,8606,Goodreads,,eng,3.94,,,,larger than life
612263,29983711,Pachinko,8161,Goodreads,,eng,4.18,grand central publishing,496.0,2017.0,pachinko
251203,19344912,"Gold (All that Glitters, #2)",1433,Goodreads,,eng,3.99,,417.0,2015.0,gold all that glitters 2
626090,32619967,Pachinko,1361,Goodreads,,eng,4.18,,490.0,,pachinko
251200,25613902,The Bitter Side of Sweet,1148,Goodreads,,eng,4.24,gp putnams sons books for young readers,336.0,2016.0,the bitter side of sweet


In [10]:
search("tokyo", 1)

Result 1
book_id: 22447379
title: 東京喰種トーキョーグール 4 [Tokyo Guru 4] (Tokyo Ghoul, #4)
num_ratings: 4556
average_rating: 4.44
num_pages: 192.0
publication_year: 2012.0
Result 2
book_id: 22447397
title: 東京喰種トーキョーグール 6 [Tokyo Guru 6] (Tokyo Ghoul, #6)
num_ratings: 4489
average_rating: 4.49
num_pages: 200.0
publication_year: 2013.0
Result 3
book_id: 22447384
title: 東京喰種トーキョーグール 5 [Tokyo Guru 5] (Tokyo Ghoul, #5)
num_ratings: 3957
average_rating: 4.43
num_pages: 200.0
publication_year: 2012.0
Result 4
book_id: 22447402
title: 東京喰種トーキョーグール 7 [Tokyo Guru 7] (Tokyo Ghoul, #7)
num_ratings: 3576
average_rating: 4.58
num_pages: 200.0
publication_year: 2013.0
Result 5
book_id: 755204
title: Tokyo Year Zero (Tokyo Trilogy, #1)
num_ratings: 1076
average_rating: 3.2800000000000002
num_pages: 355.0
publication_year: 2007.0
Result 6
book_id: 647949
title: Tokyo
num_ratings: 717
average_rating: 3.92
num_pages: 464.0
publication_year: 2005.0
Result 7
book_id: 26056757
title: Tokyo Ghost #1
num_ratings: 94
av

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
728819,22447379,"東京喰種トーキョーグール 4 [Tokyo Guru 4] (Tokyo Ghoul, #4)",4556,Goodreads,,jpn,4.44,ji ying she,192.0,2012.0,4 tokyo guru 4 tokyo ghoul 4
367983,22447397,"東京喰種トーキョーグール 6 [Tokyo Guru 6] (Tokyo Ghoul, #6)",4489,Goodreads,,jpn,4.49,ji ying she,200.0,2013.0,6 tokyo guru 6 tokyo ghoul 6
189507,22447384,"東京喰種トーキョーグール 5 [Tokyo Guru 5] (Tokyo Ghoul, #5)",3957,Goodreads,,jpn,4.43,ji ying she,200.0,2012.0,5 tokyo guru 5 tokyo ghoul 5
420130,22447402,"東京喰種トーキョーグール 7 [Tokyo Guru 7] (Tokyo Ghoul, #7)",3576,Goodreads,,jpn,4.58,ji ying she,200.0,2013.0,7 tokyo guru 7 tokyo ghoul 7
343855,755204,"Tokyo Year Zero (Tokyo Trilogy, #1)",1076,Goodreads,,,3.28,knopf,355.0,2007.0,tokyo year zero tokyo trilogy 1


In [11]:
search("lord of the rings", 1)

Result 1
book_id: 11047557
title: The Lord of the Rings
num_ratings: 2062
average_rating: 4.48
num_pages: 1209.0
publication_year: 2009.0
Result 2
book_id: 899773
title: The Lord of the Rings (The Lord of the Rings, #1-3)
num_ratings: 598
average_rating: 4.48
num_pages: 1077.0
publication_year: 1968.0
Result 3
book_id: 15357
title: The Lord of the Rings (The Lord of the Rings, #1-3)
num_ratings: 502
average_rating: 4.48
num_pages: 1200.0
publication_year: 1991.0
Result 4
book_id: 827764
title: The Lord of the Rings (The Lord of the Rings, #1-3)
num_ratings: 464
average_rating: 4.48
num_pages: 1178.0
publication_year: 1994.0
Result 5
book_id: 516970
title: The Lord of the Rings (The Lord of the Rings, #1-3)
num_ratings: 366
average_rating: 4.48
num_pages: 1147.0
publication_year: 2002.0
Result 6
book_id: 877732
title: The Lord of the Rings (The Lord of the Rings, #1-3)
num_ratings: 293
average_rating: 4.48
num_pages: 1168.0
publication_year: 2002.0
Result 7
book_id: 39
title: The Lord o

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
87665,11047557,The Lord of the Rings,2062,Goodreads,,eng,4.48,harpercollins,1209.0,2009.0,the lord of the rings
233069,899773,"The Lord of the Rings (The Lord of the Rings, #1-3)",598,Goodreads,,eng,4.48,george allen unwin,1077.0,1968.0,the lord of the rings the lord of the rings 13
698640,15357,"The Lord of the Rings (The Lord of the Rings, #1-3)",502,Goodreads,,eng,4.48,harper collins publishers,1200.0,1991.0,the lord of the rings the lord of the rings 13
244079,827764,"The Lord of the Rings (The Lord of the Rings, #1-3)",464,Goodreads,,eng,4.48,harpercollins,1178.0,1994.0,the lord of the rings the lord of the rings 13
302402,516970,"The Lord of the Rings (The Lord of the Rings, #1-3)",366,Goodreads,,eng,4.48,mariner books,1147.0,2002.0,the lord of the rings the lord of the rings 13


In [12]:
search("dragonball")

Unnamed: 0,book_id,title,num_ratings,url,cover_image,language_code,average_rating,publisher,num_pages,publication_year,mod_title
251203,19344912,"Gold (All that Glitters, #2)",1433,Goodreads,,eng,3.99,,417.0,2015.0,gold all that glitters 2
251205,17925203,Asylum (Asylum #1),883,Goodreads,,eng,3.64,harper childrens,320.0,2013.0,asylum asylum 1
251206,33016708,Dirty Quarterback,329,Goodreads,,eng,3.26,,,,dirty quarterback
246354,2065487,"Dragonball Vol. 1 (Dragon Ball, #1)",184,Goodreads,,jpn,4.33,shueisha,214.0,2002.0,dragonball vol 1 dragon ball 1
251202,25652874,Bear West (Ranch Romeos #1),168,Goodreads,,eng,3.84,,,,bear west ranch romeos 1
