# Content-based recommender system for books

In [75]:
import pandas as pd
from rake_nltk import Rake
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

pd.set_option('display.max_columns', 100)
df = pd.read_csv('Book1.csv',encoding="ISO-8859-1")
df.head()

Unnamed: 0,Title,Genre,Author,Description
0,Her Last Wish,Fiction,Pandey Ajay K.,Her Last Wish is an inspiring tale of love and...
1,Srinagar Conspiracy,Fiction,Vikram Chandra,"Jalauddin and his men are back in India, and w..."
2,The Book of Five Rings,Comics,Sean Michael Wilson,This graphic novel version of The Book of Five...
3,The Cosmopolitan Novel,Art,Schoene Berthold,While traditionally the novel has been seen as...
4,Spider-Man and Philosophy,Literary Criticism,Jonathan J. Sanford,Untangle the complex web of philosophical dile...


In [76]:
df.shape

(6, 4)

In [77]:
print(df.shape)
df.head()

(6, 4)


Unnamed: 0,Title,Genre,Author,Description
0,Her Last Wish,Fiction,Pandey Ajay K.,Her Last Wish is an inspiring tale of love and...
1,Srinagar Conspiracy,Fiction,Vikram Chandra,"Jalauddin and his men are back in India, and w..."
2,The Book of Five Rings,Comics,Sean Michael Wilson,This graphic novel version of The Book of Five...
3,The Cosmopolitan Novel,Art,Schoene Berthold,While traditionally the novel has been seen as...
4,Spider-Man and Philosophy,Literary Criticism,Jonathan J. Sanford,Untangle the complex web of philosophical dile...


In [78]:
df.set_index('Title',inplace=True)
df['Genre'] = df['Genre'].map(lambda x: x.split(' '))
df['Author'] = df['Author'].map(lambda x: x.split(' '))

In [79]:
for index, row in df.iterrows():
    row['Author'] = ''.join(row['Author']).lower()
    row['Genre'] = ''.join(row['Genre']).lower()

In [80]:
df.head()

Unnamed: 0_level_0,Genre,Author,Description
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Her Last Wish,fiction,pandeyajayk.,Her Last Wish is an inspiring tale of love and...
Srinagar Conspiracy,fiction,vikramchandra,"Jalauddin and his men are back in India, and w..."
The Book of Five Rings,comics,seanmichaelwilson,This graphic novel version of The Book of Five...
The Cosmopolitan Novel,art,schoeneberthold,While traditionally the novel has been seen as...
Spider-Man and Philosophy,literarycriticism,jonathanj.sanford,Untangle the complex web of philosophical dile...


In [81]:
# initializing the new column
df['Key_words'] = ""

for index, row in df.iterrows():
    plot = row['Description']
    
    # instantiating Rake, by default is uses english stopwords from NLTK
    # and discard all puntuation characters
    r = Rake()

    # extracting the words by passing the text
    r.extract_keywords_from_text(plot)

    # getting the dictionary whith key words and their scores
    key_words_dict_scores = r.get_word_degrees()
    
    # assigning the key words to the new column
    row['Key_words'] = list(key_words_dict_scores.keys())

# dropping the Plot column
df.drop(columns = ['Description'], inplace = True)

In [82]:
df.head()

Unnamed: 0_level_0,Genre,Author,Key_words
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Her Last Wish,fiction,pandeyajayk.,"[girlfriend, aastha, enters, upon, father, als..."
Srinagar Conspiracy,fiction,vikramchandra,"[bring, india, barely, three, months, visit, p..."
The Book of Five Rings,comics,seanmichaelwilson,"[five, rings, musashi, iconic, book, famed, se..."
The Cosmopolitan Novel,art,schoeneberthold,"[community, creative, world, new, sub, novel, ..."
Spider-Man and Philosophy,literarycriticism,jonathanj.sanford,"[blame, man, beloved, superhero, world, everyo..."


In [83]:
df['bag_of_words'] = ''
columns = df.columns
for index, row in df.iterrows():
    words = ''
    for col in columns:
        if col == 'Key_words':
            words = words + ' '.join(row[col])+ ' '
        else:
            words = words + row[col]+ ' '
    row['bag_of_words'] = words
    
df.drop(columns = [col for col in df.columns if col!= 'bag_of_words'], inplace = True)

In [84]:
df.head()

Unnamed: 0_level_0,bag_of_words
Title,Unnamed: 1_level_1
Her Last Wish,fiction pandeyajayk. girlfriend aastha enters ...
Srinagar Conspiracy,fiction vikramchandra bring india barely three...
The Book of Five Rings,comics seanmichaelwilson five rings musashi ic...
The Cosmopolitan Novel,art schoeneberthold community creative world n...
Spider-Man and Philosophy,literarycriticism jonathanj.sanford blame man ...


In [85]:
# instantiating and generating the count matrix
count = CountVectorizer()
count_matrix = count.fit_transform(df['bag_of_words'])

# creating a Series for the book titles so they are associated to an ordered numerical
# list I will use later to match the indexes
indices = pd.Series(df.index)
indices[:5]

0                Her Last Wish
1          Srinagar Conspiracy
2       The Book of Five Rings
3       The Cosmopolitan Novel
4    Spider-Man and Philosophy
Name: Title, dtype: object

In [86]:
#print(count_matrix)

In [87]:
# generating the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[ 1.        ,  0.08280787,  0.        ,  0.01354571,  0.04101516,
         0.06546537],
       [ 0.08280787,  1.        ,  0.02760262,  0.05608456,  0.05660629,
         0.01505847],
       [ 0.        ,  0.02760262,  1.        ,  0.04063713,  0.06835859,
         0.02182179],
       [ 0.01354571,  0.05608456,  0.04063713,  1.        ,  0.03703862,
         0.02955916],
       [ 0.04101516,  0.05660629,  0.06835859,  0.03703862,  1.        ,
         0.0447512 ],
       [ 0.06546537,  0.01505847,  0.02182179,  0.02955916,  0.0447512 ,
         1.        ]])

In [88]:
# function that takes in book title as input and returns the top recommended books
def recommendations(title, cosine_sim = cosine_sim):
    
    recommended_movies = []
    
    # gettin the index of the book that matches the title
    idx = indices[indices == title].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the most similar books
    top_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the titles of the best books
    for i in top_indexes:
        recommended_movies.append(list(df.index)[i])
        
    return recommended_movies

In [89]:
recommendations('Her Last Wish')

['Srinagar Conspiracy',
 'Be My Perfect Ending',
 'Spider-Man and Philosophy',
 'The Cosmopolitan Novel',
 'The Book of Five Rings']

In [90]:
df.head(10)

Unnamed: 0_level_0,bag_of_words
Title,Unnamed: 1_level_1
Her Last Wish,fiction pandeyajayk. girlfriend aastha enters ...
Srinagar Conspiracy,fiction vikramchandra bring india barely three...
The Book of Five Rings,comics seanmichaelwilson five rings musashi ic...
The Cosmopolitan Novel,art schoeneberthold community creative world n...
Spider-Man and Philosophy,literarycriticism jonathanj.sanford blame man ...
Be My Perfect Ending,romance arpitvageria everyone survive ups succ...
