In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Dot, Add
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [7]:
# -------------------------------------------- Bucket MOOD : books_with_rats_moods.csv ----------------------------------------------------- #
# Custom function to handle unseen labels
def transform_with_fallback(encoder, labels, default='unknown'):
    if not hasattr(encoder, 'classes_'):
        raise ValueError("This LabelEncoder instance is not fitted yet.")
    unseen_labels = [label for label in labels if label not in encoder.classes_]
    if unseen_labels:
        if default not in encoder.classes_:
            encoder.classes_ = np.append(encoder.classes_, default)
        return encoder.transform([default if label in unseen_labels else label for label in labels])
    else:
        return encoder.transform(labels)

In [8]:
# Load the dataset
merged_books_ratings = pd.read_csv('/content/drive/MyDrive/ISR Project /Project/books_with_rats_moods.csv')
print(merged_books_ratings.columns)

Index(['Unnamed: 0', 'Book', 'Author', 'Description', 'Genres',
       'Year of Publication', 'Publisher_x', 'URL', 'Aggregated Emotions',
       'Aggregated Des Emotions', 'ISBN', 'Book-Title', 'Book-Author',
       'Year-Of-Publication', 'Publisher_y', 'Image-URL-S', 'Image-URL-M',
       'Image-URL-L', 'User-ID', 'Book-Rating', 'Sorted Buckets',
       'Sorted Buckets desc', 'Total Buckets', 'Max Mood'],
      dtype='object')


In [9]:
# Encoding user IDs, book IDs, and Moods
user_encoder = LabelEncoder()
book_encoder = LabelEncoder()
mood_encoder = LabelEncoder()

merged_books_ratings['User-ID'] = merged_books_ratings['User-ID'].astype(str)

merged_books_ratings['user_id_encoded'] = user_encoder.fit_transform(merged_books_ratings['User-ID'])
merged_books_ratings['book_id_encoded'] = book_encoder.fit_transform(merged_books_ratings['Book'])
#mood_encoder.fit(list(merged_books_ratings['Max Mood']) + ['unknown'])  # Including 'unknown' label
mood_encoder.fit([m.lower() for m in merged_books_ratings['Max Mood']] + ['unknown'])
#merged_books_ratings['mood_encoded'] = transform_with_fallback(mood_encoder, merged_books_ratings['Max Mood'])
merged_books_ratings['mood_encoded'] = transform_with_fallback(mood_encoder, [m.lower() for m in merged_books_ratings['Max Mood']], default='unknown')

print("Mood classes after fitting:", mood_encoder.classes_)
print("User ID classes in encoder:", user_encoder.classes_)
print("User ID '26' in encoder classes:", '26' in user_encoder.classes_)
#print("Unique moods in dataset:", merged_books_ratings['Max Mood'].unique())

Mood classes after fitting: ['fearful' 'joyful' 'melancholic' 'motivational' 'romantic' 'unknown']
User ID classes in encoder: ['100004' '100009' '100010' ... '99980' '99996' '99997']
User ID '26' in encoder classes: True


In [10]:
# Normalize ratings
merged_books_ratings['Book-Rating'] = merged_books_ratings['Book-Rating'].apply(lambda x: (x - 1) / 9)

In [11]:
# Split the data
train, test = train_test_split(merged_books_ratings, test_size=0.2, random_state=42)

In [12]:
# Model architecture for collaborative filtering
def build_collaborative_filtering_model(num_users, num_books, embedding_size=15):
    user_input = Input(shape=(1,))
    book_input = Input(shape=(1,))
    user_embedding = Embedding(num_users, embedding_size, embeddings_regularizer=l2(1e-6))(user_input)
    book_embedding = Embedding(num_books, embedding_size, embeddings_regularizer=l2(1e-6))(book_input)
    user_vec = Flatten()(user_embedding)
    book_vec = Flatten()(book_embedding)
    dot_product = Dot(axes=1)([user_vec, book_vec])
    user_bias = Flatten()(Embedding(num_users, 1)(user_input))
    book_bias = Flatten()(Embedding(num_books, 1)(book_input))
    sum = Add()([dot_product, user_bias, book_bias])
    model = Model([user_input, book_input], sum)
    model.compile(optimizer=Adam(0.001), loss='mean_squared_error')
    return model

# Build and train the model
model = build_collaborative_filtering_model(len(user_encoder.classes_), len(book_encoder.classes_))
model.fit([train['user_id_encoded'], train['book_id_encoded']], train['Book-Rating'], batch_size=64, epochs=5, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7a55c80730d0>

In [13]:
# Function to recommend books based on user similarity and mood
def recommend_books(user_id, mood, top_n=5):
    try:
        user_id_str = str(user_id)  # Convert to string to match the encoding
        mood_str = mood.lower()     # Convert to lowercase to match the encoding

        print("Looking for user:", user_id_str)
        print("Looking for mood:", mood_str)

        # Check if the user and mood are in the encoder classes
        if user_id_str not in user_encoder.classes_:
            return f"User ID '{user_id_str}' not found in dataset."
        if mood_str not in mood_encoder.classes_:
            return f"Mood '{mood_str}' not found in dataset."

        user_idx = user_encoder.transform([user_id_str])
        mood_idx = mood_encoder.transform([mood_str])
        print(f"User index: {user_idx}")
        print(f"Mood index: {mood_idx}")

        # Get the valid books for the mood index
        valid_books = merged_books_ratings[merged_books_ratings['mood_encoded'] == mood_idx[0]]['book_id_encoded'].unique()
        #print(f"Valid books indices: {valid_books}")
        print(f"Number of valid books for mood '{mood_str}': {len(valid_books)}")

        if len(valid_books) == 0:
            return f"No books found for mood '{mood_str}'."

        # Predict the ratings for the valid books
        predictions = model.predict([np.array([user_idx[0]] * len(valid_books)), valid_books])
        #print(f"Predictions: {predictions}")

        top_books_idx = predictions.flatten().argsort()[-top_n:][::-1]
        recommended_books = book_encoder.inverse_transform(top_books_idx)

        return recommended_books
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [20]:
# Test the recommendation function
user_id = "26"  # Use a valid user ID for testing
mood = "Melancholic"  # Example mood
#print("Recommended Books:", recommend_books(test_user_id, test_mood))
recommended_books = recommend_books(user_id, mood)
print(recommended_books)

# Fetch the corresponding image URLs from the dataset
image_urls = merged_books_ratings.loc[merged_books_ratings['Book'].isin(recommended_books), 'URL']
#print(image_urls)

# Display the recommendations with images
print(f"Recommendations for User ID {user_id} with mood '{mood}':")
for book, url in zip(recommended_books, image_urls):
    print(f" - {book}")
    print(url)
    print()  # Add space between different recommendations

Looking for user: 26
Looking for mood: melancholic
User index: [10681]
Mood index: [2]
Number of valid books for mood 'melancholic': 503
['dhalgren' 'darkness at noon' 'even cowgirls get the blues'
 'balzac and the little chinese seamstress' '1984']
Recommendations for User ID 26 with mood 'Melancholic':
 - dhalgren
https://www.goodreads.com/book/show/7572.Even_Cowgirls_Get_the_Blues

 - darkness at noon
https://www.goodreads.com/book/show/7572.Even_Cowgirls_Get_the_Blues

 - even cowgirls get the blues
https://www.goodreads.com/book/show/7572.Even_Cowgirls_Get_the_Blues

 - balzac and the little chinese seamstress
https://www.goodreads.com/book/show/7572.Even_Cowgirls_Get_the_Blues

 - 1984
https://www.goodreads.com/book/show/7572.Even_Cowgirls_Get_the_Blues



In [21]:
# Assuming the model and data encoding are already set up as per previous discussions
# Example users and moods for testing
test_cases = [
    {"user_id": str(user_encoder.classes_[0]), "mood": mood_encoder.inverse_transform([0])[0].lower()},  # first user and first mood
    {"user_id": str(user_encoder.classes_[1]), "mood": mood_encoder.inverse_transform([1])[0].lower()},  # second user and second mood
    {"user_id": str(user_encoder.classes_[2]), "mood": mood_encoder.inverse_transform([2])[0].lower()},  # third user and third mood
    {"user_id": str(user_encoder.classes_[3]), "mood": mood_encoder.inverse_transform([3])[0].lower()},  # fourth user and fourth mood
    {"user_id": str(user_encoder.classes_[4]), "mood": mood_encoder.inverse_transform([4])[0].lower()}   # fifth user and fifth mood
]

# Test the recommendation system for each case
for case in test_cases:
    recommended_books = recommend_books(case["user_id"], case["mood"], top_n=5)
    print(f"Recommendations for User ID {case['user_id']} with mood '{case['mood']}':")
    image_urls = merged_books_ratings.loc[merged_books_ratings['Book'].isin(recommended_books), 'URL']
    for book, url in zip(recommended_books, image_urls):
      print(f" - {book}")
      print(url)
    print()  # Just to add space between different test cases

Looking for user: 100004
Looking for mood: fearful
User index: [0]
Mood index: [0]
Number of valid books for mood 'fearful': 50
Recommendations for User ID 100004 with mood 'fearful':
 - a light in the attic
https://www.goodreads.com/book/show/28920.A_Pale_View_of_Hills
 - 2010: odyssey two
https://www.goodreads.com/book/show/5197.A_Lesson_Before_Dying
 - a lesson before dying
https://www.goodreads.com/book/show/5197.A_Lesson_Before_Dying
 - a clockwork orange
https://www.goodreads.com/book/show/5197.A_Lesson_Before_Dying
 - a pale view of hills
https://www.goodreads.com/book/show/5197.A_Lesson_Before_Dying

Looking for user: 100009
Looking for mood: joyful
User index: [1]
Mood index: [1]
Number of valid books for mood 'joyful': 765
Recommendations for User ID 100009 with mood 'joyful':
 - brave new world
https://www.goodreads.com/book/show/95608.Leota_s_Garden
 - crime and punishment
https://www.goodreads.com/book/show/765172.Cane
 - caps for sale: a tale of a peddler, some monkeys an

In [22]:
user_id = "21576"
mood = "romantic"
recommended_books = recommend_books(user_id, mood, top_n=5)
print(f"Recommendations for User ID {user_id} with mood '{mood}':")
for book in recommended_books:
    print(f" - {book}")
print("\n")  # Just to add space between different test cases

Looking for user: 21576
Looking for mood: romantic
User index: [7570]
Mood index: [4]
Number of valid books for mood 'romantic': 90
Recommendations for User ID 21576 with mood 'romantic':
 - always outnumbered, always outgunned
 - a light in the attic
 - 2010: odyssey two
 - alas, babylon
 - a man




In [23]:
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

# Assuming 'model' is the model you have already loaded and compiled
# Ensure the 'test' DataFrames are prepared as per your split in the model training code

# Predict ratings for the test dataset
predicted_ratings = model.predict([test['user_id_encoded'], test['book_id_encoded']]).flatten()
print(len(predicted_ratings))

# True ratings extracted from the test dataset
true_ratings = test['Book-Rating'].values
print(len(true_ratings))

# Calculate RMSE
rmse = sqrt(mean_squared_error(true_ratings, predicted_ratings))
print("Root Mean Squared Error (RMSE):", rmse)

# Define a function to calculate accuracy based on a threshold
def calculate_accuracy(true_ratings, predicted_ratings, threshold=0.5):
    correct = 0
    for true, pred in zip(true_ratings, predicted_ratings):
        if abs(true - pred) < threshold:
            correct += 1
    return correct / len(true_ratings)

# Calculate and print accuracy
accuracy = calculate_accuracy(true_ratings, predicted_ratings)
print("Accuracy:", accuracy)

9223
9223
Root Mean Squared Error (RMSE): 0.448368017887179
Accuracy: 0.7075788788897321
