### Import Libraries

In [310]:
import pandas as pd
import numpy as np

### Loading Dataset

In [311]:
books = pd.read_csv('Books.csv', low_memory=False);
users = pd.read_csv('Users.csv', low_memory=False);
ratings = pd.read_csv('Ratings.csv', low_memory=False);

In [312]:
books.shape

(271360, 8)

In [313]:
users.shape

(278858, 3)

In [314]:
ratings.shape

(1149780, 3)

### Renaming column

In [315]:
import pandas as pd

def rename_columns(dataframe, column_mapping):
    """
    Renames columns in a pandas DataFrame.
    
    Parameters:
    dataframe (pd.DataFrame): The DataFrame whose columns need to be renamed.
    column_mapping (dict): A dictionary where keys are current column names and values are new column names.
    
    Returns:
    pd.DataFrame: A DataFrame with updated column names.
    """
    if not isinstance(dataframe, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame.")
    if not isinstance(column_mapping, dict):
        raise ValueError("Column mapping must be a dictionary.")
    
    return dataframe.rename(columns=column_mapping)

In [316]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [317]:
# Renaming columns
column_mapping = {
                    'ISBN': 'ISBN', 
                    'Book-Title': 'BOOK_TITLE',
                    'Book-Author': 'BOOK_AUTHOR', 
                    'Year-Of-Publication': 'YEAR_OF_PUBLICATION',
                    'Publisher': 'PUBLISHER', 
                    'Image-URL-S': 'IMAGE_URL_S',
                    'Image-URL-M': 'IMAGE_URL_M', 
                    'Image-URL-L': 'IMAGE_URL_L'
                }

In [318]:
books = rename_columns(books, column_mapping)

In [319]:
users.columns

Index(['User-ID', 'Location', 'Age'], dtype='object')

In [320]:
# Renaming columns
column_mapping = {
                    'User-ID': 'USER_ID', 
                    'Location': 'LOCATION',
                    'Age': 'AGE'
                }

In [321]:
users = rename_columns(users, column_mapping)

In [322]:
ratings.columns

Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

In [323]:
# Renaming columns
column_mapping = {
                    'User-ID': 'USER_ID', 
                    'ISBN': 'ISBN',
                    'Book-Rating': 'BOOK_RATING'
                }

In [324]:
ratings = rename_columns(ratings, column_mapping)

### Checking missing value and duplicate records

In [325]:
books.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
ISBN,0
BOOK_TITLE,0
BOOK_AUTHOR,2
YEAR_OF_PUBLICATION,0
PUBLISHER,2
IMAGE_URL_S,0
IMAGE_URL_M,0
IMAGE_URL_L,3


In [326]:
users.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
USER_ID,0
LOCATION,0
AGE,110762


In [327]:
ratings.isnull().sum().to_frame().rename(columns={0:"Total No. of Missing Values"})

Unnamed: 0,Total No. of Missing Values
USER_ID,0
ISBN,0
BOOK_RATING,0


In [328]:
print("Duplicate Values =",books.duplicated().sum())

Duplicate Values = 0


In [329]:
print("Duplicate Values =",users.duplicated().sum())

Duplicate Values = 0


In [330]:
print("Duplicate Values =",ratings.duplicated().sum())

Duplicate Values = 0


In [331]:
books.sample(1)

Unnamed: 0,ISBN,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
144130,394708407,Murder on the Thirty-First Floor (Pantheon int...,Per Wahloo,1982,Random House Inc,http://images.amazon.com/images/P/0394708407.0...,http://images.amazon.com/images/P/0394708407.0...,http://images.amazon.com/images/P/0394708407.0...


In [332]:
users.sample()

Unnamed: 0,USER_ID,LOCATION,AGE
154464,154465,"albany, california, usa",32.0


In [333]:
ratings.sample()

Unnamed: 0,USER_ID,ISBN,BOOK_RATING
952495,230522,446527785,0


In [334]:
rating_with_name = ratings.merge(books, on='ISBN')
rating_with_name.sample()

Unnamed: 0,USER_ID,ISBN,BOOK_RATING,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
879383,110746,1854794450,0,The Little Book of Farting,Alec Bromcie,1999,Michael O'Mara Books,http://images.amazon.com/images/P/1854794450.0...,http://images.amazon.com/images/P/1854794450.0...,http://images.amazon.com/images/P/1854794450.0...


### Popularity Based recommendation System

In [335]:
# Group by BOOK_TITLE and calculate the number of rating
num_rating_dataframe = rating_with_name.groupby('BOOK_TITLE').size().reset_index(name='NUM_OF_RATING').sort_values(by='BOOK_TITLE', ascending=True)
num_rating_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241066,Ã?Â?lpiraten.,2
241067,Ã?Â?rger mit Produkt X. Roman.,4
241068,Ã?Â?sterlich leben.,1
241069,Ã?Â?stlich der Berge.,3


In [336]:
# Group by BOOK_TITLE and calculate the average rating
avg_rating_dataframe = rating_with_name.groupby('BOOK_TITLE')['BOOK_RATING'].mean().reset_index(name='AVG_RATING')
avg_rating_dataframe

Unnamed: 0,BOOK_TITLE,AVG_RATING
0,A Light in the Storm: The Civil War Diary of ...,2.250000
1,Always Have Popsicles,0.000000
2,Apple Magic (The Collector's series),0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,0.000000
...,...,...
241066,Ã?Â?lpiraten.,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,5.250000
241068,Ã?Â?sterlich leben.,7.000000
241069,Ã?Â?stlich der Berge.,2.666667


In [337]:
popular_dataframe = num_rating_dataframe.merge(avg_rating_dataframe, on='BOOK_TITLE')
popular_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING,AVG_RATING
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [338]:
popular_dataframe = popular_dataframe[popular_dataframe['NUM_OF_RATING'] > 250].sort_values('AVG_RATING', ascending= False).head(50)
popular_dataframe

Unnamed: 0,BOOK_TITLE,NUM_OF_RATING,AVG_RATING
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
191612,The Hobbit : The Enchanting Prelude to The Lor...,281,5.007117
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
211384,"The Two Towers (The Lord of the Rings, Part 2)",260,4.880769
219741,To Kill a Mockingbird,510,4.7


In [339]:
popular_dataframe = popular_dataframe.merge(books, on='BOOK_TITLE').drop_duplicates('BOOK_TITLE')[['BOOK_TITLE','AVG_RATING','BOOK_AUTHOR','PUBLISHER','IMAGE_URL_M']]
popular_dataframe

Unnamed: 0,BOOK_TITLE,AVG_RATING,BOOK_AUTHOR,PUBLISHER,IMAGE_URL_M
0,Harry Potter and the Prisoner of Azkaban (Book 3),5.852804,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Goblet of Fire (Book 4),5.824289,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439139597.0...
5,Harry Potter and the Sorcerer's Stone (Book 1),5.73741,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0590353403.0...
9,Harry Potter and the Order of the Phoenix (Boo...,5.501441,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/043935806X.0...
13,Harry Potter and the Chamber of Secrets (Book 2),5.183453,J. K. Rowling,Scholastic,http://images.amazon.com/images/P/0439064872.0...
16,The Hobbit : The Enchanting Prelude to The Lor...,5.007117,J.R.R. TOLKIEN,Del Rey,http://images.amazon.com/images/P/0345339681.0...
17,The Fellowship of the Ring (The Lord of the Ri...,4.94837,J.R.R. TOLKIEN,Del Rey,http://images.amazon.com/images/P/0345339703.0...
26,Harry Potter and the Sorcerer's Stone (Harry P...,4.895652,J. K. Rowling,Arthur A. Levine Books,http://images.amazon.com/images/P/059035342X.0...
28,"The Two Towers (The Lord of the Rings, Part 2)",4.880769,J.R.R. TOLKIEN,Del Rey,http://images.amazon.com/images/P/0345339711.0...
39,To Kill a Mockingbird,4.7,Harper Lee,Little Brown &amp; Company,http://images.amazon.com/images/P/0446310786.0...


### Collaborative filtering algorithm

**Find all those users who rated atleast 200 books**

In [340]:
rated_atleast_200_books = rating_with_name.groupby('USER_ID').count()['BOOK_RATING'] > 200

**Get the index of all those users who rated atleast 200 books**

In [341]:
book_worm = rated_atleast_200_books[rated_atleast_200_books].index

**Create the dataframe for those users who rated atleast 200 books**

In [342]:
filtered_rating = rating_with_name[rating_with_name['USER_ID'].isin(book_worm)]
filtered_rating

Unnamed: 0,USER_ID,ISBN,BOOK_RATING,BOOK_TITLE,BOOK_AUTHOR,YEAR_OF_PUBLICATION,PUBLISHER,IMAGE_URL_S,IMAGE_URL_M,IMAGE_URL_L
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
5,23768,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
7,28523,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
15,77940,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
16,81977,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
...,...,...,...,...,...,...,...,...,...,...
1030883,275970,1880837927,0,The Theology of the Hammer,Millard Fuller,1994,Smyth &amp; Helwys Publishing,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...,http://images.amazon.com/images/P/1880837927.0...
1030884,275970,188717897X,0,The Ordeal of Integration: Progress and Resent...,Orlando Patterson,1998,Civitas Book Publisher,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...,http://images.amazon.com/images/P/188717897X.0...
1030885,275970,1888889047,0,Pushcart's Complete Rotten Reviews &amp; Rejec...,Bill Henderson,1998,Pushcart Press,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...,http://images.amazon.com/images/P/1888889047.0...
1030886,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...,http://images.amazon.com/images/P/1931868123.0...


**Find all those books which has atleast 50 rating**

In [343]:
book_rated_50 = filtered_rating.groupby('BOOK_TITLE').count()['BOOK_RATING'] >= 50
famous_books = book_rated_50[book_rated_50].index

In [344]:
final_rating = filtered_rating[filtered_rating['BOOK_TITLE'].isin(famous_books)]

**Pivot the table**

In [345]:
pt = final_rating.drop_duplicates().pivot_table(index='BOOK_TITLE',columns='USER_ID',values = 'BOOK_RATING')

In [346]:
pt.fillna(0,inplace=True)

In [347]:
pt

USER_ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
BOOK_TITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [348]:
from sklearn.metrics.pairwise import cosine_similarity

In [349]:
similarity_score = cosine_similarity(pt)

In [350]:
similarity_score.shape

(706, 706)

### Book recommendation system

In [351]:
def book_recommend(book_name):
    ## fetch index
    index = np.where(pt.index == book_name)[0][0]
    similar_item = sorted(list(enumerate(similarity_score[index])), key =lambda x : x[1],reverse=True)[1: 11]

    for i in similar_item:
        print(pt.index[i[0]])

In [352]:
book_recommend('The Lovely Bones: A Novel')

Where the Heart Is (Oprah's Book Club (Paperback))
Good in Bed
The Book of Ruth (Oprah's Book Club (Paperback))
Life of Pi
Lucky : A Memoir
The Pilot's Wife : A Novel
Drowning Ruth (Oprah's Book Club)
The Nanny Diaries: A Novel
Nights in Rodanthe
Empire Falls


In [353]:
book_recommend('Harry Potter and the Chamber of Secrets (Book 2)')

Harry Potter and the Prisoner of Azkaban (Book 3)
Harry Potter and the Goblet of Fire (Book 4)
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))
Harry Potter and the Sorcerer's Stone (Book 1)
Harry Potter and the Order of the Phoenix (Book 5)
Charlotte's Web (Trophy Newbery)
The Fellowship of the Ring (The Lord of the Rings, Part 1)
The Witness
The Firm
The Secret Garden


In [354]:
book_recommend('The Witness')

Secrets
Exclusive
Message from Nam
Kaleidoscope
Remember Me
The Long Road Home
Anne of Avonlea (Anne of Green Gables Novels (Paperback))
Montana Sky
Unspeakable
Fine Things


### dump the required files

In [355]:
import pickle

In [356]:
pickle.dump(popular_dataframe,open('popular_50.pkl','wb'))

In [357]:
pickle.dump(pt,open('pt.pkl','wb'))

In [358]:
pickle.dump(similarity_score,open('similarity_score.pkl','wb'))

In [359]:
pickle.dump(books,open('books.pkl','wb'))

### Recommendation function for streamlit

In [360]:
def book_recommend_web(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['BOOK_TITLE'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['BOOK_TITLE'].values))
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['BOOK_AUTHOR'].values))
        item.extend(list(temp_df.drop_duplicates('BOOK_TITLE')['IMAGE_URL_M'].values))
        
        data.append(item)
    df = pd.DataFrame(data, columns=["BOOK_TITLE", "BOOK_AUTHOR", "IMAGE_URL_M"])
    return df

In [361]:
book_recommend_web('The Notebook')

Unnamed: 0,BOOK_TITLE,BOOK_AUTHOR,IMAGE_URL_M
0,A Walk to Remember,Nicholas Sparks,http://images.amazon.com/images/P/0446608955.0...
1,The Rescue,Nicholas Sparks,http://images.amazon.com/images/P/0446610399.0...
2,One Door Away from Heaven,Dean R. Koontz,http://images.amazon.com/images/P/0553582755.0...
3,Toxin,Robin Cook,http://images.amazon.com/images/P/0425166619.0...
