### Problem statement: Recommend a best book based on the author.

#dataset: books.csv

In [1]:
#Importing necessary libraries

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel 

In [2]:
#Reading dataset by using pandas library

books=pd.read_csv('C:\\Users\\Raja\\Downloads\\assignments\\recommender\\books.csv',encoding="ISO-8859-1")

In [3]:
#Using shape function to know the rows & columns

books.shape 

(5000, 6)

In [4]:
#Extraction of only columns from dataset

books.columns

Index(['sn', 'users', 'Book_Title', 'Book_Author', 'Publisher', 'ratings'], dtype='object')

In [5]:
#Showing of only top five records 

books.head()

Unnamed: 0,sn,users,Book_Title,Book_Author,Publisher,ratings
0,1,1,Classical Mythology,Mark P. O. Morford,Oxford University Press,0
1,2,2,Clara Callan,Richard Bruce Wright,HarperFlamingo Canada,5
2,3,3,Decision in Normandy,Carlo D'Este,HarperPerennial,0
3,4,4,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,Farrar Straus Giroux,3
4,5,5,The Mummies of Urumchi,E. J. W. Barber,W. W. Norton &amp; Company,6


In [6]:
#Creating a Tfidf Vectorizer to remove all stop words
#Taking stop words from tfid vectorizer 

tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(books['Book_Author'])    

In [7]:
#Calculating Cosine Similarity
#Cosine similarity is a measure of similarity between two non-zero vetors 
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {}
for idx, row in books.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-5000:-1]
    similar_items = [(cosine_similarities[idx][i], books['sn'][i]) for i in similar_indices]
    results[row['sn']] = similar_items[1:]
    
print('done!')

done!


In [8]:
#Making a recommendation Model
#Declaring function

def item(sn):  
  return books.loc[books['sn'] == sn]['Book_Author'].tolist()[0].split(' - ')[0] 

In [9]:
# Just reads the results out of the dictionary.

def recommend(item_sn, num):
    print("Recommending " + str(num) + " products similar to " + item(item_sn) + "...")
    print("-------")
    recs = results[item_sn][:num]
    for rec in recs:
        print("Recommended: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

recommend(item_sn=11, num=5)

Recommending 5 products similar to David Adams Richards...
-------
Recommended: Susan Richards Shreve (score:0.18787952448147047)
Recommended: Douglas Adams (score:0.17592841838925802)
Recommended: Douglas Adams (score:0.17592841838925802)
Recommended: Douglas Adams (score:0.17592841838925802)
Recommended: Douglas Adams (score:0.17592841838925802)


----------------------------------------End Part -1 ------------------------------------------------------------------------