# what is content based  recommender


In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [31]:
ds = pd.read_csv("../data/books.csv") #you can plug in your own list of products or movies or books here as csv file
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')

title_column = 'title'
tfidf_matrix = tf.fit_transform(ds[title_column])
cosine_similarities = cosine_similarity(tfidf_matrix,tfidf_matrix)

results = {} # dictionary created to store the result in a dictionary format (ID : (Score,item_id))

for idx, row in ds.iterrows(): #iterates through all the rows
    # the below code 'similar_indice' stores similar ids based on cosine similarity. sorts them in ascending order. [:-5:-1] is then used so that the indices with most similarity are got. 0 means no similarity and 1 means perfect similarity
    similar_indices = cosine_similarities[idx].argsort()[:-5:-1] #stores 5 most similar books, you can change it as per your needs
    similar_items = [(cosine_similarities[idx][i], ds['book_id'][i]) for i in similar_indices]
    results[row['book_id']] = similar_items[1:]
    
#below code 'function item(id)' returns a row matching the id along with Book Title. Initially it is a dataframe, then we convert it to a list
def item(id):
    return ds.loc[ds['book_id'] == id]['title'].tolist()[0]

def recommend(id, num):
    if (num == 0):
        print("Unable to recommend any book as you have not chosen the number of book to be recommended")
    elif (num==1):
        print("Recommending " + str(num) + " book similar to " + item(id))
        
    else :
        print("Recommending " + str(num) + " books similar to " + item(id))
        
    print("----------------------------------------------------------")
    recs = results[id][:num]
    for rec in recs:
        print("You may also like to read: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

#the first argument in the below function to be passed is the id of the book, second argument is the number of books you want to be recommended
recommend(274,2)

Recommending 2 books similar to The Godfather
----------------------------------------------------------
You may also like to read: Desiring God: Meditations of a Christian Hedonist (score:0.0)
You may also like to read: Consider the Lobster and Other Essays (score:0.0)


In [30]:
def search_by_title(title):
    return ds.loc[ds['title'].str.lower().str.contains(title),['title','book_id']]

search_by_title('godfather')

Unnamed: 0,title,book_id
273,The Godfather,274


https://www.kaggle.com/zygmunt/goodbooks-10k#tags.csv

https://www.kdnuggets.com/2019/11/content-based-recommender-using-natural-language-processing-nlp.html


https://towardsdatascience.com/how-to-build-from-scratch-a-content-based-movie-recommender-with-natural-language-processing-25ad400eb243

https://towardsdatascience.com/my-journey-to-building-book-recommendation-system-5ec959c41847


https://www.linkedin.com/pulse/content-based-recommender-engine-under-hood-venkat-raman/