## Importing Libraries

In [35]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from sklearn.metrics.pairwise import linear_kernel

#for deep learning model
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

## Data Pre-processing

In [37]:
pd.set_option('display.max_colwidth', None)

In [39]:
books = pd.read_csv('books.csv')
ratings = pd.read_csv('ratings.csv')

In [40]:
books.head(2)

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m/2767052.jpg,https://images.gr-assets.com/books/1447303603s/2767052.jpg
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m/3.jpg,https://images.gr-assets.com/books/1474154022s/3.jpg


In [41]:
ratings.head(2)

Unnamed: 0,user_id,book_id,rating
0,1,258,5
1,2,4081,4


In [42]:
books.columns

Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url'],
      dtype='object')

## Collaborative Filtering using SVD

In [47]:
# Create a reader object for the Surprise library
reader = Reader(rating_scale=(1, 5))

# Load ratings into a Surprise dataset
data = Dataset.load_from_df(ratings[['user_id', 'book_id', 'rating']], reader)

### Book Recommendations based on a User

**Load the saved model**

In [49]:
import pickle
with open('svd_model.pkl', 'rb') as f:
    loaded_svd = pickle.load(f)

print("Model loaded from 'svd_model.pkl'")

Model loaded from 'svd_model.pkl'


In [50]:
# Function to recommend books for a user
def recommend_books_for_user(user_id, books, model, n_recommendations=5):
    all_book_ids = books['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id'].tolist()

    book_predictions = []
    for book_id in all_book_ids:
        if book_id not in rated_books:
            est_rating = model.predict(user_id, book_id).est
            book_predictions.append((book_id, est_rating))

    book_predictions.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = book_predictions[:n_recommendations]

    recommended_titles = []
    for book_id, _ in top_recommendations:
        title = books[books['book_id'] == book_id]['title'].values[0]
        recommended_titles.append(title)

    return recommended_titles

In [71]:
user_id = int(input("Enter user id: "))
n_recommendations = int(input("Enter the num of recommendation you want? "))
recommended_titles = recommend_books_for_user(user_id, books, loaded_svd, n_recommendations)

print(f"Top recommended Books for User {user_id}:")
for i, title in enumerate(recommended_titles, 1):
    print(f"{i}. {title}")

Enter user id:  8
Enter the num of recommendation you want?  5


Top recommended Books for User 8:
1. Lamb: The Gospel According to Biff, Christ's Childhood Pal
2. Saga, Vol. 3 (Saga, #3)
3. The Complete Calvin and Hobbes
4. Ficciones
5. ESV Study Bible


## Content Based Filtering using tf-idf & Cosine Similarity

In [55]:
# Combine title and authors for content-based filtering
books['combined_text'] = books['title'] + ' ' + books['authors']

# Feature extraction for content-based filtering
tfidf = TfidfVectorizer(stop_words='english')
books['combined_text'] = books['combined_text'].fillna('')  # Fill NaN with empty strings
tfidf_matrix = tfidf.fit_transform(books['combined_text'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

### Recommendations based on Book Title

In [57]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the book that matches the title
    if title not in books['title'].values:
        return "Title not found in the dataset."
        
    idx = books.index[books['title'] == title].tolist()[0]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 5 most similar books
    sim_scores = sim_scores[1:6]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 5 most similar books
    return books.iloc[book_indices]

In [59]:
books.title[:10]

0                     The Hunger Games (The Hunger Games, #1)
1    Harry Potter and the Sorcerer's Stone (Harry Potter, #1)
2                                     Twilight (Twilight, #1)
3                                       To Kill a Mockingbird
4                                            The Great Gatsby
5                                      The Fault in Our Stars
6                                                  The Hobbit
7                                      The Catcher in the Rye
8                       Angels & Demons  (Robert Langdon, #1)
9                                         Pride and Prejudice
Name: title, dtype: object

In [73]:
usr_input = input("Enter book title: ")
recommended_books = get_recommendations(usr_input)
print("Top 5 recommendations are:")
recommended_books[['title', 'authors']]

Enter book title:  The Great Gatsby


Top 5 recommendations are:


Unnamed: 0,title,authors
2303,This Side of Paradise,F. Scott Fitzgerald
1183,Tender Is the Night,F. Scott Fitzgerald
3254,The Beautiful and Damned,F. Scott Fitzgerald
8869,The Great Brain (Great Brain #1),"John D. Fitzgerald, Mercer Mayer"
7408,The Short Stories,"F. Scott Fitzgerald, Matthew J. Bruccoli"


### Book Recommendations based on a User

In [65]:
def get_user_recommendations(user_id, n_recommendations, ratings, books, cosine_sim):
    
    # Get books the user has interacted with
    user_books = ratings[ratings['user_id'] == user_id]

    if user_books.empty:
        return "No interaction data found for this user."

    # Aggregate the user's profile (weighted by ratings)
    user_profile = np.zeros(cosine_sim.shape[0])
    for _, row in user_books.iterrows():
        book_idx = books.index[books['book_id'] == row['book_id']].tolist()[0]
        user_profile += row['rating'] * cosine_sim[book_idx]
    
    # Normalize the user profile
    user_profile = user_profile / np.linalg.norm(user_profile)

    # Compute similarity scores with all books
    sim_scores = list(enumerate(user_profile))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Filter out books the user has already interacted with
    interacted_indices = set(user_books['book_id'].map(
        lambda x: books.index[books['book_id'] == x].tolist()[0]))
    sim_scores = [score for score in sim_scores if score[0] not in interacted_indices]

    # Get the top n_recommendations recommended books
    top_indices = [score[0] for score in sim_scores[:n_recommendations]]

    # Return the recommended books
    return books.iloc[top_indices]


In [75]:
user_id = int(input("Enter user id: "))
n_recommendations = int(input("Enter the num of recommendation you want? "))
recommendations = get_user_recommendations(user_id, n_recommendations, ratings=ratings, books=books, cosine_sim=cosine_sim)

print(f"Top recommended Books for User {user_id}:")
for i, title in enumerate(recommended_titles, 1):
    print(f"{i}. {title}")

Enter user id:  8
Enter the num of recommendation you want?  5


Top recommended Books for User 8:
1. Lamb: The Gospel According to Biff, Christ's Childhood Pal
2. Saga, Vol. 3 (Saga, #3)
3. The Complete Calvin and Hobbes
4. Ficciones
5. ESV Study Bible


## Hybrid Approach

In [77]:
# Function to get content-based recommendations as scores
def get_content_based_scores(user_id, books, ratings, cosine_sim):
    user_books = ratings[ratings['user_id'] == user_id]

    if user_books.empty:
        return np.zeros(cosine_sim.shape[0]), "No interaction data found for this user."

    # Aggregate the user's profile (weighted by ratings)
    user_profile = np.zeros(cosine_sim.shape[0])
    for _, row in user_books.iterrows():
        book_idx = books.index[books['book_id'] == row['book_id']].tolist()[0]
        user_profile += row['rating'] * cosine_sim[book_idx]

    # Normalize the user profile
    user_profile = user_profile / np.linalg.norm(user_profile)

    return user_profile, None

# Function to get collaborative filtering scores
def get_collaborative_scores(user_id, books, model):
    all_book_ids = books['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id'].tolist()

    collaborative_scores = np.zeros(len(books))
    for book_idx, book_id in enumerate(books['book_id']):
        if book_id not in rated_books:
            est_rating = model.predict(user_id, book_id).est
            collaborative_scores[book_idx] = est_rating

    return collaborative_scores

# Weighted hybrid recommendation function
def weighted_hybrid_recommendations(user_id, n_recommendations, books, ratings, cosine_sim, model, weight=0.5):
    content_scores, content_error = get_content_based_scores(user_id, books, ratings, cosine_sim)
    if content_error:
        return content_error
    
    collaborative_scores = get_collaborative_scores(user_id, books, model)
    
    # Combine the scores using a weighted sum
    hybrid_scores = weight * content_scores + (1 - weight) * collaborative_scores
    
    # Get top n_recommendations by sorting the scores
    top_indices = np.argsort(hybrid_scores)[-n_recommendations:][::-1]
    recommended_titles = books.iloc[top_indices]['title'].tolist()

    return recommended_titles

# Example usage
user_id = int(input("Enter user id: "))
n_recommendations = int(input("Enter the number of recommendations you want? "))
recommended_titles = weighted_hybrid_recommendations(
    user_id, n_recommendations, books, ratings, cosine_sim, loaded_svd, weight=0.5
)

print(f"Top recommended Books for User {user_id}:")
for i, title in enumerate(recommended_titles, 1):
    print(f"{i}. {title}")

Enter user id:  2
Enter the number of recommendations you want?  7


Top recommended Books for User 2:
1. The Harry Potter Collection 1-4 (Harry Potter, #1-4)
2. Harry Potter Boxset (Harry Potter, #1-7)
3. Harry Potter Boxed Set, Books 1-5 (Harry Potter, #1-5)
4. Harry Potter and the Order of the Phoenix (Harry Potter, #5, Part 1)
5. Harry Potter Page to Screen: The Complete Filmmaking Journey
6. The Ultimate Hitchhiker's Guide to the Galaxy
7. The Complete Works


## Deep Learning Model

### Predicting Rating

In [88]:
# Create input features and target
X = np.random.rand(len(ratings), len(books.columns) + 1)  # Adjust this to include relevant features
y = ratings['rating'].values

In [32]:
# Define the model
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(X.shape[1],)))
model.add(Dropout(0.2)) #method for regularization of dl models
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))  # Output layer for regression task

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [33]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 2ms/step - loss: 1.0155 - val_loss: 0.9324
Epoch 2/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 2ms/step - loss: 0.9970 - val_loss: 0.9337
Epoch 3/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 2ms/step - loss: 0.9946 - val_loss: 0.9321
Epoch 4/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 2ms/step - loss: 0.9951 - val_loss: 0.9327
Epoch 5/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 2ms/step - loss: 0.9966 - val_loss: 0.9321
Epoch 6/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 2ms/step - loss: 0.9958 - val_loss: 0.9329
Epoch 7/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 2ms/step - loss: 0.9961 - val_loss: 0.9348
Epoch 8/10
[1m149412/149412[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 2ms/step - loss: 0.

<keras.src.callbacks.history.History at 0x29b51713ed0>

In [34]:
# Function to predict ratings using the deep learning model
def predict_rating_dl(input_features):
    return model.predict(np.array(input_features).reshape(1, -1))

In [35]:
predicted_rating_dl = predict_rating_dl(X[0])
print(f'Predicted Rating (DL): {predicted_rating_dl[0][0]}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
Predicted Rating (DL): 3.892058849334717


### Predicting Books

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

In [19]:
num_users = ratings['user_id'].nunique()
num_books = ratings['book_id'].nunique()

# Prepare training and testing data
train_ratings, test_ratings = train_test_split(ratings, test_size=0.2, random_state=42)

In [25]:
# Define the model
def build_model(num_users, num_books, embedding_size=50):
    # User and Book Input Layers
    user_input = Input(shape=(1,), name='user_input')
    book_input = Input(shape=(1,), name='book_input')

    # User and Book Embeddings
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
    book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size, name='book_embedding')(book_input)

    # Flatten the embeddings
    user_vecs = Flatten()(user_embedding)
    book_vecs = Flatten()(book_embedding)

    # Concatenate user and book embeddings
    input_vecs = Concatenate()([user_vecs, book_vecs])

    # Add Dense layers
    x = Dense(128, activation='relu')(input_vecs)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    y = Dense(1)(x)

    # Build and compile the model
    model = Model(inputs=[user_input, book_input], outputs=y)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    
    return model

# Build the model
model = build_model(num_users, num_books)

# Define a checkpoint callback to save the model
checkpoint = ModelCheckpoint('deep_model.keras', save_best_only=True, monitor='val_loss', mode='min')

In [29]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

# Create mapping dictionaries for user_ids and book_ids
user_mapping = {id_: i for i, id_ in enumerate(ratings['user_id'].unique())}
book_mapping = {id_: i for i, id_ in enumerate(books['book_id'].unique())}

# Create reverse mappings (for converting back to original IDs)
reverse_user_mapping = {v: k for k, v in user_mapping.items()}
reverse_book_mapping = {v: k for k, v in book_mapping.items()}

# Map the IDs to sequential integers
ratings['user_id_mapped'] = ratings['user_id'].map(user_mapping)
ratings['book_id_mapped'] = ratings['book_id'].map(book_mapping)

# Define the number of unique users and books
num_users = len(user_mapping)
num_books = len(book_mapping)

# Prepare training and testing data
train_ratings, test_ratings = train_test_split(ratings, test_size=0.2, random_state=42)

# Define the model
def build_model(num_users, num_books, embedding_size=50):
    # User and Book Input Layers
    user_input = Input(shape=(1,), name='user_input')
    book_input = Input(shape=(1,), name='book_input')

    # User and Book Embeddings
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size, name='user_embedding')(user_input)
    book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size, name='book_embedding')(book_input)

    # Flatten the embeddings
    user_vecs = Flatten()(user_embedding)
    book_vecs = Flatten()(book_embedding)

    # Concatenate user and book embeddings
    input_vecs = Concatenate()([user_vecs, book_vecs])

    # Add Dense layers
    x = Dense(128, activation='relu')(input_vecs)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    y = Dense(1)(x)

    # Build and compile the model
    model = Model(inputs=[user_input, book_input], outputs=y)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    
    return model

# Build the model
model = build_model(num_users, num_books)

# Define a checkpoint callback to save the model
checkpoint = ModelCheckpoint('deep_model.keras', save_best_only=True, monitor='val_loss', mode='min')

# Train the model using mapped IDs
model.fit(
    [train_ratings['user_id_mapped'].values, train_ratings['book_id_mapped'].values],
    train_ratings['rating'].values,
    validation_data=(
        [test_ratings['user_id_mapped'].values, test_ratings['book_id_mapped'].values],
        test_ratings['rating'].values
    ),
    epochs=10,
    batch_size=64,
    callbacks=[checkpoint]
)

Epoch 1/10




[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1391s[0m 19ms/step - loss: 0.8576 - val_loss: 0.7279
Epoch 2/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1653s[0m 22ms/step - loss: 0.7078 - val_loss: 0.6997
Epoch 3/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1646s[0m 22ms/step - loss: 0.6607 - val_loss: 0.6891
Epoch 4/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1414s[0m 19ms/step - loss: 0.6290 - val_loss: 0.6811
Epoch 5/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1331s[0m 18ms/step - loss: 0.5984 - val_loss: 0.6797
Epoch 6/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1324s[0m 18ms/step - loss: 0.5689 - val_loss: 0.6827
Epoch 7/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1316s[0m 18ms/step - loss: 0.5397 - val_loss: 0.6903
Epoch 8/10
[1m74706/74706[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1324s[0m 18ms/step - loss: 0.5147 - val_

Enter user id:  4
Enter the number of recommendations you want?  2



Top recommended Books for User 4:
1. Harry Potter Boxset (Harry Potter, #1-7) (Predicted Rating: 5.05)
2. Harry Potter Boxed Set, Books 1-5 (Harry Potter, #1-5) (Predicted Rating: 5.02)


In [31]:
# Function to recommend books for a user
def recommend_books_deep(user_id, books, model, n_recommendations=5):
    # Map the user_id to the sequential ID
    mapped_user_id = user_mapping.get(user_id)
    if mapped_user_id is None:
        return "User ID not found in training data"

    all_book_ids = books['book_id'].unique()
    rated_books = ratings[ratings['user_id'] == user_id]['book_id'].tolist()

    book_predictions = []
    for book_id in all_book_ids:
        if book_id not in rated_books:
            mapped_book_id = book_mapping.get(book_id)
            if mapped_book_id is not None:
                pred_rating = model.predict(
                    [np.array([mapped_user_id]), np.array([mapped_book_id])],
                    verbose=0
                )[0][0]
                book_predictions.append((book_id, pred_rating))

    book_predictions.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = book_predictions[:n_recommendations]

    recommended_titles = []
    for book_id, pred_rating in top_recommendations:
        title = books[books['book_id'] == book_id]['title'].values[0]
        recommended_titles.append((title, pred_rating))

    return recommended_titles

In [33]:
user_id = int(input("Enter user id: "))
n_recommendations = int(input("Enter the number of recommendations you want? "))
recommended_titles = recommend_books_deep(user_id, books, model, n_recommendations)

print(f"\nTop recommended Books for User {user_id}:")
if isinstance(recommended_titles, str):
    print(recommended_titles)
else:
    for i, (title, rating) in enumerate(recommended_titles, 1):
        print(f"{i}. {title} (Predicted Rating: {rating:.2f})")



Enter user id:  2
Enter the number of recommendations you want?  4



KeyboardInterrupt



In [None]:
with open('id_mappings.pkl', 'wb') as f:
    pickle.dump(mapping_data, f)