In [1]:
#Importing all the necessary libraries
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import os

#For model building
import scipy
import math
import sklearn
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity # importing consine_similarity score from metrics module of seaborn lib.
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.neighbors import NearestNeighbors


# This is to supress the warning messages
import warnings
warnings.filterwarnings('ignore') # for ignoring the warnings

In [2]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

In [None]:
Books = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/BookRecommendor/Books.csv' , sep=";", on_bad_lines='skip', encoding='latin-1')

In [None]:
Books.head()

In [None]:
Books.shape

In [None]:
Books.columns

In [None]:
Books= Books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']]

In [None]:
Books.shape

In [None]:
Ratings = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/BookRecommendor/Ratings.csv' , sep=";", on_bad_lines='skip', encoding='latin-1')

In [None]:
Ratings.head()

In [None]:
Ratings.shape

In [None]:
Users = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/BookRecommendor/Users.csv' , sep=";", on_bad_lines='skip', encoding='latin-1')

In [None]:
Users.head()

In [None]:
print(Books.shape)
print(Ratings.shape)
print(Users.shape)

In [None]:
Books.rename(columns={"Book-Title":'Title',
                      'Book-Author':'Author',
                     "Year-Of-Publication":'Year',
                     "Publisher":"Publisher"},inplace=True)

In [None]:
Books.head()

In [None]:
Users.rename(columns={"User-ID":'User_ID'}, inplace=True)

In [None]:
Users.head()

In [None]:
Ratings.rename(columns={"User-ID":'User_ID',
                      'Book-Rating':'Rating'},inplace=True)

In [None]:
Ratings.head()

In [None]:
print(Books.shape, Users.shape, Ratings.shape, sep='\n')

In [None]:
Ratings['User_ID'].value_counts()   #checking the count of ratings by the User_ID

In [None]:
#checking number of unique User IDs
Ratings['User_ID'].value_counts().shape
Ratings['User_ID'].unique().shape

In [None]:
##storing users who had at least rated more than 200 books
x= Ratings['User_ID'].value_counts() > 200

In [None]:
x[x].shape

In [None]:
##storing the  list of User IDs
y= x[x].index
y

In [None]:
Ratings = Ratings[Ratings['User_ID'].isin(y)]

In [None]:
Ratings.head()

In [None]:
##merging the ratings dataframe with books
# Ratings_with_Books = Ratings.merge(Books, on='ISBN', Users on='User_ID')
# Merging the ratings dataframe with books
Ratings_with_Books = Ratings.merge(Books, on='ISBN').merge(Users, on='User_ID')

In [None]:
Ratings_with_Books.head()

In [None]:
Ratings_with_Books.shape

In [None]:
Number_Rating = Ratings_with_Books.groupby('Title')['Rating'].count().reset_index()

In [None]:
Number_Rating.head()

In [None]:
Number_Rating.rename(columns={'Rating':'Number of Ratings'}, inplace=True)

In [None]:
Number_Rating.head()

In [None]:
Number_Rating.shape

In [None]:
Final_Rating= Ratings_with_Books.merge(Number_Rating, on='Title')

In [None]:
Final_Rating.head()

In [None]:
Final_Rating = Final_Rating[Final_Rating['Number of Ratings'] >= 50]

In [None]:
Final_Rating.head()

In [None]:
Final_Rating.shape

In [None]:
Final_Rating.drop_duplicates(['User_ID','Title'],inplace=True)
Final_Rating.shape

In [None]:
Final_Rating.isnull().sum()

In [None]:
Final_Rating.dtypes

**Top-K**

In [None]:
agg_final_rating_df = Final_Rating.groupby('Title').agg(Average_Rating = ('Rating','mean'),Number_of_ratings=('Rating','count')).reset_index()

In [None]:
agg_final_rating_df.head()

In [None]:
def recommend_top_k(n=5):
  top_recommendations = agg_final_rating_df.sort_values(['Average_Rating','Number_of_ratings'],ascending=False).head(10)
  return top_recommendations

In [None]:
recommended_top_10 = recommend_top_k(10)

In [None]:
print("Your Top 10 Recommendations : \n")
recommended_top_10.head(10)

In [None]:
# Collaborative Filtering
def collaborative_filtering(User_id,Final_Rating, top_n=5):
    user_item_matrix = Final_Rating.pivot(index="User_ID", columns="ISBN", values="Rating").fillna(0)
    sparse_matrix = csr_matrix(user_item_matrix.values)

    model_knn = NearestNeighbors(metric="cosine", algorithm="brute")
    model_knn.fit(sparse_matrix)

    user_vector = user_item_matrix.loc[User_id].values.reshape(1, -1)
    distances, indices = model_knn.kneighbors(user_vector, n_neighbors=top_n + 1)

    recommended_book_ids = user_item_matrix.iloc[indices.flatten()[1:]].mean(axis=0).nlargest(top_n).index
    recommended_books = Final_Rating[Final_Rating["ISBN"].isin(recommended_book_ids)]

    return recommended_books[["Title", "Author"]]

In [None]:
collab_rec = collaborative_filtering(277427, Final_Rating, top_n=10)
print("Collaborative Filtering Recommendations:\n", collab_rec.drop_duplicates())

In [None]:
# Content-Based Filtering
def content_based_recommendation(book_title, books_df, top_n=5):
    tfidf = TfidfVectorizer(stop_words="english")
    Final_Rating["Title"] = Final_Rating["Title"].fillna("")
    content_based_final_df = Final_Rating.drop_duplicates(subset="Title")
    tfidf_matrix = tfidf.fit_transform(content_based_final_df["Title"])

    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    indices = pd.Series(content_based_final_df.index, index=content_based_final_df["Title"]).drop_duplicates()

    idx = indices[book_title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]
    book_indices = [i[0] for i in sim_scores]

    return content_based_final_df.iloc[book_indices][["Title", "Author"]]

In [None]:
content_rec = content_based_recommendation("Girl in Hyacinth Blue", Final_Rating, top_n=5)
print("Content-Based Recommendations:\n", content_rec.drop_duplicates())