In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd

In [2]:
books = pd.read_csv('Data/BX-Books.csv', sep=";", on_bad_lines='skip', encoding='latin-1')
users = pd.read_csv('Data/BX-Users.csv', sep=";", on_bad_lines='skip', encoding='latin-1')
ratings = pd.read_csv('Data/BX-Book-Ratings.csv', sep=";", on_bad_lines='skip', encoding='latin-1')

In [3]:
print("Books Dataset: ",books.shape)
print("Users Dataset: ",users.shape)
print("Ratings Dataset: ",ratings.shape)

Books Dataset:  (271360, 8)
Users Dataset:  (278858, 3)
Ratings Dataset:  (1149780, 3)


In [4]:
print("Books Dataset: ",books.columns)


Books Dataset:  Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')


In [5]:
print("\nUsers Dataset: ",users.columns)



Users Dataset:  Index(['User-ID', 'Location', 'Age'], dtype='object')


In [6]:
print("\nRatings Dataset: ",ratings.columns)


Ratings Dataset:  Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')


In [7]:
books.rename(columns={"Book-Title":'Title',
                      'Book-Author':'Author',
                     "Year-Of-Publication":'Year',
                     "Image-URL-L":"URL"},inplace=True)

ratings.rename(columns={"Book-Rating": "Ratings"}, inplace=True)

In [8]:
books.head(5)

Unnamed: 0,ISBN,Title,Author,Year,Publisher,Image-URL-S,Image-URL-M,URL
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [9]:
ratings.head(5)

Unnamed: 0,User-ID,ISBN,Ratings
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [10]:
users.head(5)

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [11]:
books = books[['ISBN','Title', 'Author', 'Year', 'Publisher','URL']]

In [12]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Ratings
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [13]:
ratings['User-ID'].value_counts()

User-ID
11676     13602
198711     7550
153662     6109
98391      5891
35859      5850
          ...  
116180        1
116166        1
116154        1
116137        1
276723        1
Name: count, Length: 105283, dtype: int64

In [14]:
x = ratings['User-ID'].value_counts() > 200
x[x].shape

(899,)

In [15]:
y= x[x].index

In [16]:
ratings = ratings[ratings['User-ID'].isin(y)]

In [17]:
ratings.shape

(526356, 3)

In [18]:
ratings_with_books = ratings.merge(books, on='ISBN')

In [19]:
ratings_with_books.head()

Unnamed: 0,User-ID,ISBN,Ratings,Title,Author,Year,Publisher,URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...


In [20]:
ratings_with_books.shape

(487671, 8)

In [21]:
number_rating = ratings_with_books.groupby('Title')['Ratings'].count().reset_index()

In [22]:
number_rating.head()

Unnamed: 0,Title,Ratings
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [23]:
number_rating.rename(columns={'Ratings':'No. of Ratings'},inplace=True)

In [24]:
number_rating.head()

Unnamed: 0,Title,No. of Ratings
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [25]:
final_rating = ratings_with_books.merge(number_rating, on='Title')

In [26]:
ratings_with_books

Unnamed: 0,User-ID,ISBN,Ratings,Title,Author,Year,Publisher,URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
...,...,...,...,...,...,...,...,...
487666,275970,1892145022,0,Here Is New York,E. B. White,1999,Little Bookroom,http://images.amazon.com/images/P/1892145022.0...
487667,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),http://images.amazon.com/images/P/1931868123.0...
487668,275970,3411086211,10,Die Biene.,Sybil GrÃ?Â¤fin SchÃ?Â¶nfeldt,1993,"Bibliographisches Institut, Mannheim",http://images.amazon.com/images/P/3411086211.0...
487669,275970,3829021860,0,The Penis Book,Joseph Cohen,1999,Konemann,http://images.amazon.com/images/P/3829021860.0...


In [27]:
final_rating.shape

(487671, 9)

In [28]:
final_rating = final_rating[final_rating['No. of Ratings'] >= 50]

In [29]:
final_rating.drop_duplicates(['User-ID','Title'],inplace=True)

In [30]:
final_rating.shape

(59850, 9)

In [31]:
book_pivot = final_rating.pivot_table(columns='User-ID', index='Title', values= 'Ratings')

In [32]:
book_pivot

User-ID,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,,,0.0,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,,...,,,,0.0,,,,,0.0,
4 Blondes,,,,,,,,,,0.0,...,,,,,,,,,,
84 Charing Cross Road,,,,,,,,,,,...,,,,,,10.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,7.0,,,,,7.0,,...,,,,,,0.0,,,,
You Belong To Me,,,,,,,,,,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,,,0.0,...,,,,,,0.0,,,,
Zoya,,,,,,,,,,,...,,,,,,,,,,


In [33]:
book_pivot.shape

(742, 888)

In [34]:
book_pivot.fillna(0, inplace=True)

In [35]:
book_pivot

User-ID,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84 Charing Cross Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
# SVD

In [46]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split

# Convert the pivot table back to long format for Surprise
ratings_df = book_pivot.reset_index().melt(id_vars=['Title'], var_name='User-ID', value_name='Rating')
ratings_df = ratings_df[ratings_df['Rating'] > 0]  # Remove entries with 0 ratings

# Create a Surprise dataset
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings_df[['User-ID', 'Title', 'Rating']], reader)

# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Create and train the SVD model
svd_model = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
svd_model.fit(trainset)

def get_top_n_recommendations(model, user_id, book_pivot, n=5):
    # Get all books the user hasn't rated
    user_books = set(book_pivot.index[book_pivot[user_id] > 0])
    all_books = set(book_pivot.index)
    books_to_predict = list(all_books - user_books)
    
    # Make predictions
    predictions = [model.predict(user_id, book) for book in books_to_predict]
    
    # Sort predictions by estimated rating
    top_n = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    
    return [(pred.iid, pred.est) for pred in top_n]

# Example usage
user_id = book_pivot.columns[1]  # Example user
recommendations = get_top_n_recommendations(svd_model, user_id, book_pivot)
print(f"Top 5 recommendations for user {user_id}:")
for title, estimated_rating in recommendations:
    print(f"- {title} (Estimated rating: {estimated_rating:.2f})")

Top 5 recommendations for user 2276:
- Harry Potter and the Sorcerer's Stone (Book 1) (Estimated rating: 9.96)
- Harry Potter and the Prisoner of Azkaban (Book 3) (Estimated rating: 9.94)
- The Two Towers (The Lord of the Rings, Part 2) (Estimated rating: 9.93)
- Seabiscuit: An American Legend (Estimated rating: 9.81)
- The Return of the King (The Lord of the Rings, Part 3) (Estimated rating: 9.80)


In [42]:
!pip install scikit-surprise scikit-learn


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25ldone
[?25h  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-macosx_11_0_arm64.whl size=493853 sha256=f690af2d37a080a662d048c24ac331c06b21f1340bf25ee333cc45f6bd22121c
  Stored in directory: /Users/robinsingh/Library/Caches/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [41]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Create the 'content' column
books['content'] = books['Title'] + ' ' + books['Author'] + ' ' + books['Publisher'] + ' ' + books['Year'].astype(str)

# Fill NaN values with an empty string
books['content'] = books['content'].fillna('')

# Create TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(books['content'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recommendations based on content similarity
def get_content_based_recommendations(title, cosine_sim=cosine_sim, books=books):
    # Get the index of the book that matches the title
    idx = books[books['Title'] == title].index[0]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar books
    sim_scores = sim_scores[1:11]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar books
    return books['Title'].iloc[book_indices]

# Example usage
book_title = "Harry Potter and the Chamber of Secrets (Book 2)"
recommendations = get_content_based_recommendations(book_title)
print(f"\nTop 10 content-based recommendations for '{book_title}':")
for title in recommendations:
    print(f"- {title}")

: 

In [36]:
from scipy.sparse import csr_matrix
book_sparse = csr_matrix(book_pivot)

book_names = book_pivot.index

In [37]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm= 'brute')

In [38]:
model.fit(book_sparse)

In [39]:
distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6 )

In [40]:
distance

array([[ 0.        , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,
        77.28518616]])

In [41]:
suggestion

array([[237, 240, 238, 241, 184, 536]])

In [42]:
book_pivot.iloc[241,:]

User-ID
254       9.0
2276      0.0
2766      0.0
2977      0.0
3363      0.0
         ... 
275970    9.0
277427    0.0
277478    0.0
277639    0.0
278418    0.0
Name: Harry Potter and the Sorcerer's Stone (Book 1), Length: 888, dtype: float64

In [43]:
for i in range(len(suggestion)):
    print(book_pivot.index[suggestion[i]])

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',
       'The Cradle Will Fall'],
      dtype='object', name='Title')


In [44]:
import pickle
pickle.dump(model,open('Model.pkl','wb'))
pickle.dump(book_names,open('Book_Names.pkl','wb'))
pickle.dump(final_rating,open('Final_Rating.pkl','wb'))
pickle.dump(book_pivot,open('Book_Pivot.pkl','wb'))

In [45]:
def recommend_book(book_name):
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )
    
    for i in range(len(suggestion)):
            books = book_pivot.index[suggestion[i]]
            for j in books:
                if j == book_name:
                    print(f"You searched '{book_name}'\n")
                    print("The suggestion books are: \n")
                else:
                    print(j)

In [51]:
book_name = "Harry Potter and the Chamber of Secrets (Book 2)"
recommend_book(book_name)

You searched 'Harry Potter and the Chamber of Secrets (Book 2)'

The suggestion books are: 

Harry Potter and the Prisoner of Azkaban (Book 3)
Harry Potter and the Goblet of Fire (Book 4)
Harry Potter and the Sorcerer's Stone (Book 1)
Exclusive
The Cradle Will Fall
