# Recommendation System

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
## to ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
## import data
books = pd.read_csv('archive/Books.csv')
users = pd.read_csv('archive/Users.csv')
ratings = pd.read_csv('archive/Ratings.csv')

In [None]:
books.head()

In [None]:
users.head()

In [None]:
ratings.head()

In [None]:
books.shape

In [None]:
users.shape

In [None]:
ratings.shape

In [None]:
### check for null data
books.isnull().sum()

In [None]:
books = books.dropna()

In [None]:
books.isnull().sum()

In [None]:
users.isnull().sum()

In [None]:
users = users.dropna()

In [None]:
users.isnull().sum()

In [None]:
ratings.isnull().sum()

In [None]:
### Check for duplicated
books.duplicated().sum()

In [None]:
users.duplicated().sum()

In [None]:
ratings.duplicated().sum()

In [None]:
np.sort(ratings['Book-Rating'].unique())

In [None]:
books.info()

In [None]:
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype('int32')

In [None]:
books.info()

## Popularity based recommender system

In [None]:
## joining books and rating tables
books_with_ratings = ratings.merge(books , on='ISBN')

In [None]:
books_with_ratings.head()

In [None]:
books_with_ratings.shape

In [None]:
popular_df = books_with_ratings.groupby('Book-Title').agg(num_rating=('Book-Rating','count'),
                                                          avg_rating=('Book-Rating','mean')  )

In [None]:
popular_df = popular_df.reset_index()

In [None]:
popular_df

In [None]:
popular_df.sort_values('num_rating',ascending=False)

In [None]:
## popularity is based on the no of people read the book
## it is also based on the rating it got
popular_df = popular_df[popular_df['num_rating']>300].sort_values('avg_rating',ascending=False)

In [None]:
popular_df = popular_df.head(50)
popular_df

In [None]:
## for the model deployment I need book-title, authoer,imageurl
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_rating','avg_rating']]

In [None]:
popular_df

In [None]:
### collabarative filtering
#### similiar book prediction based on user feedback

### books_with_ratings.head()

In [None]:
## grouping based on user-id tells the no of books rated by each user
x = books_with_ratings.groupby('User-ID').count()
x

In [None]:
x.index

In [None]:
x.shape

In [None]:
## select users who have given rating or read more than 200 books
x = x['Book-Rating']>200
x

In [None]:
power_users = x[x].index
power_users

In [None]:
## select only records of power users
filtered_ratings = books_with_ratings[books_with_ratings['User-ID'].isin(power_users)]

In [None]:
filtered_ratings

In [None]:
### select only best users (users which have given ratings to more than 200 books)
y = filtered_ratings.groupby('Book-Title').count()
y

In [None]:
y.sort_values('User-ID',ascending=False)

In [None]:
y = y['User-ID']>50
y

In [None]:
famous_books = y[y].index
famous_books

In [None]:
final_rating = filtered_ratings[filtered_ratings['Book-Title'].isin(famous_books)]
final_rating

In [None]:
## pivot table gives rating for each books for each user
pt = final_rating.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')
pt

In [None]:
pt = pt.fillna(0)

In [None]:
similiarity_scores = cosine_similarity(pt)
similiarity_scores

In [None]:
df_temp = pd.DataFrame(similiarity_scores)
df_temp

In [None]:
df_temp_name = df_temp

In [None]:
df_temp_name.index = pt.index
df_temp_name.columns = pt.index

df_temp_name

In [None]:
def recommend(book_name):
    index = np.where(pt.index==book_name)[0][0]
    similiar_items = sorted(list(enumerate(similiarity_scores[index])),key=lambda x : x[1] , reverse=True)[1:6]
     ## lets create an emply list and in that list i want to popoluate with the book info
    
    data = []
    for i in similiar_items:
        item = [] 
        temp_df = books[books['Book-Title']==pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        data.append(item)

    return data   

In [None]:
recommend('1984')

In [None]:
books.columns

#### Export data and model to pkl

In [None]:
import pickle as pkl
pkl.dump(popular_df,open('popular.pkl','wb'))

In [None]:
pkl.dump(books,open('books.pkl','wb')) ## books data
pkl.dump(pt,open('pt.pkl','wb')) ## books and user feedback
pkl.dump(similiarity_scores,open('similiarity_scores.pkl','wb'))