# USER BASED CF

In [1]:
import pandas as pd
import numpy as np

# Assuming 'interactions' is your filtered DataFrame
interactions = pd.read_csv("./data/filtered_interactions.csv")

# Create the user-item interaction matrix
user_item_matrix = interactions.pivot(index='user_id', columns='book_id', values='rating')
user_item_matrix.fillna(0, inplace=True)

In [2]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the user similarity matrix
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [3]:
def predict_ratings(user_item_matrix, user_similarity_df):
    mean_user_rating = user_item_matrix.mean(axis=1)
    ratings_diff = (user_item_matrix.T - mean_user_rating).T
    pred = mean_user_rating[:, np.newaxis] + user_similarity_df.dot(ratings_diff) / np.array([np.abs(user_similarity_df).sum(axis=1)]).T
    return pred

# Predict ratings
predicted_ratings = predict_ratings(user_item_matrix.values, user_similarity)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

In [4]:
def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
    # Get the user's predicted ratings
    user_pred_ratings = predicted_ratings_df.loc[user_id]
    
    # Sort the predicted ratings in descending order
    user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
    
    # Get the book IDs of the top N predictions
    top_n_books = user_pred_ratings_sorted.index[:n]
    return top_n_books

# Example: Get top-10 recommendations for a specific user (user_id=1)
user_id = 1
top_n_recommendations = get_top_n_recommendations(predicted_ratings_df, user_id, n=10)
print(f"Top-10 recommendations for user {user_id}: {top_n_recommendations.tolist()}")

Top-10 recommendations for user 1: [943, 613, 941, 939, 938, 968, 670, 944, 772, 858]


# ITEM BASED CF

In [5]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the item similarity matrix
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

In [6]:
def predict_ratings_item_based(user_item_matrix, item_similarity_df):
    mean_user_rating = user_item_matrix.mean(axis=1)
    ratings_diff = user_item_matrix - mean_user_rating[:, np.newaxis]
    pred = mean_user_rating[:, np.newaxis] + ratings_diff.dot(item_similarity_df) / np.array([np.abs(item_similarity_df).sum(axis=1)])
    return pred

# Predict ratings
predicted_ratings_item_based = predict_ratings_item_based(user_item_matrix.values, item_similarity_df.values)
predicted_ratings_item_based_df = pd.DataFrame(predicted_ratings_item_based, index=user_item_matrix.index, columns=user_item_matrix.columns)

In [7]:
def get_top_n_recommendations_item_based(predicted_ratings_df, user_id, n=10):
    # Get the user's predicted ratings
    user_pred_ratings = predicted_ratings_df.loc[user_id]
    
    # Sort the predicted ratings in descending order
    user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
    
    # Get the book IDs of the top N predictions
    top_n_books = user_pred_ratings_sorted.index[:n]
    return top_n_books

# Example: Get top-10 recommendations for a specific user (user_id=1)
user_id = 1
top_n_recommendations_item_based = get_top_n_recommendations_item_based(predicted_ratings_item_based_df, user_id, n=10)
print(f"Top-10 recommendations for user {user_id} (item-based): {top_n_recommendations_item_based.tolist()}")

Top-10 recommendations for user 1 (item-based): [1042, 983, 1036, 972, 973, 974, 1040, 949, 1043, 975]


# NEW USER

In [8]:
import pandas as pd
import numpy as np
# Load the CSV files
book_id_map_df = pd.read_csv("./data/book_id_map.csv")
book_works_df = pd.read_csv("./data/book_works.csv")

book_id_map = dict(zip(book_id_map_df['book_id_csv'], book_id_map_df['book_id']))

def get_work_id(book_id):
    return book_id_map[book_id]

# Define the function to get original title by work_id
def get_original_title_by_book_id(work_id, book_works_df):
    # Find the row with the matching best_book_id
    match = book_works_df[book_works_df['best_book_id'] == work_id]
    
    # If a match is found, return the original title
    if not match.empty:
        return match['original_title'].values[0]
    else:
        return None

In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_user_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate user-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def predict_ratings(user_item_matrix, user_similarity_df):
        mean_user_rating = user_item_matrix.mean(axis=1)
        ratings_diff = (user_item_matrix.T - mean_user_rating).T
        pred = mean_user_rating[:, np.newaxis] + user_similarity_df.dot(ratings_diff) / np.array([np.abs(user_similarity_df).sum(axis=1)]).T
        return pred

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        return top_n_books

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)

    # Compute the updated user similarity matrix
    updated_user_similarity = cosine_similarity(updated_user_item_matrix)
    updated_user_similarity_df = pd.DataFrame(updated_user_similarity, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.index)

    # Predict ratings for the updated user-item matrix
    updated_predicted_ratings = predict_ratings(updated_user_item_matrix.values, updated_user_similarity_df)
    updated_predicted_ratings_df = pd.DataFrame(updated_predicted_ratings, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)

    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations = get_top_n_recommendations(updated_predicted_ratings_df, new_user_id, n)

    return top_n_recommendations.tolist()

In [10]:
# New user's ratings DataFrame
new_user_ratings = pd.DataFrame({
    'user_id': 999999999,  # Replace with the new user's ID
    'book_id': [7300, 1201, 7057, 530615, 943],  # Replace with the rated book IDs
    'rating': [5, 1, 1, 0, 5]  # Replace with the ratings
})

In [18]:
# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get user-based CF recommendations
top_n_recommendations_user_based = get_user_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 user-based recommendations for the new user: {top_n_recommendations_user_based}")

Top-10 user-based recommendations for the new user: [943, 7300, 939, 941, 536, 938, 613, 968, 944, 1387, 670, 862, 786, 1386, 1201, 997, 1574, 772, 1000, 524, 7351, 1013, 858, 1003, 996, 461, 1012, 1002, 1203, 821]


In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_item_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate item-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def predict_ratings_item_based(user_item_matrix, item_similarity_df):
        mean_user_rating = user_item_matrix.mean(axis=1)
        ratings_diff = user_item_matrix - mean_user_rating[:, np.newaxis]
        pred = mean_user_rating[:, np.newaxis] + ratings_diff.dot(item_similarity_df) / np.array([np.abs(item_similarity_df).sum(axis=1)])
        return pred

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        return top_n_books

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)

    # Compute the updated item similarity matrix
    updated_item_similarity = cosine_similarity(updated_user_item_matrix.T)
    updated_item_similarity_df = pd.DataFrame(updated_item_similarity, index=updated_user_item_matrix.columns, columns=updated_user_item_matrix.columns)

    # Predict ratings for the updated user-item matrix
    updated_predicted_ratings_item_based = predict_ratings_item_based(updated_user_item_matrix.values, updated_item_similarity_df.values)
    updated_predicted_ratings_item_based_df = pd.DataFrame(updated_predicted_ratings_item_based, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)

    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations = get_top_n_recommendations(updated_predicted_ratings_item_based_df, new_user_id, n)

    return top_n_recommendations.tolist()

# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get item-based CF recommendations
top_n_recommendations_item_based = get_item_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 item-based recommendations for the new user: {top_n_recommendations_item_based}")

Top-10 item-based recommendations for the new user: [8100, 8092, 8099, 8102, 6891, 6890, 6889, 6888, 5767, 6838, 8096, 6182, 8083, 8077, 7006, 6183, 6184, 6185, 6186, 6304, 6412, 7300, 15441, 12894, 15426, 15428, 15437, 15429, 15449, 15465]


  pred = mean_user_rating[:, np.newaxis] + ratings_diff.dot(item_similarity_df) / np.array([np.abs(item_similarity_df).sum(axis=1)])


In [13]:
new_user_books = [7300, 1201, 530615, 943]

for book_id in new_user_books:
    work_id = get_work_id(book_id)
    print(get_original_title_by_book_id(work_id, book_works_df))

The Bell Jar
Little Women
The Sins of the Father
Harry Potter and the Philosopher's Stone


In [19]:
recommended_books = top_n_recommendations_user_based

for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        continue
    print(title)

Harry Potter and the Philosopher's Stone
The Bell Jar
Harry Potter and the Order of the Phoenix
Harry Potter and the Prisoner of Azkaban
The Hunger Games
Harry Potter and the Goblet of Fire
Harry Potter and the Deathly Hallows
Harry Potter and the Chamber of Secrets
Harry Potter and the Half-Blood Prince
Catching Fire
The Fellowship of the Ring
The Catcher in the Rye
To Kill a Mockingbird
Mockingjay
Little Women
The Lion, the Witch and the Wardrobe
The Giver
The Hobbit : or There and Back Again
Twilight
Man som hatar kvinnor
Un di Velt Hot Geshvign
The Kite Runner
The Great Gatsby
A Game of Thrones
Animal Farm: A Fairy Story
Pride and Prejudice
Jane Eyre
The Da Vinci code
Nineteen Eighty-Four


In [15]:
recommended_books = top_n_recommendations_item_based

for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        continue
    print(title)

Lips Touch: Three Times
The Social Animal: The Hidden Sources of Love, Character, and Achievement
The Mortal Instrument Series: City of Bones / City of Ashes / City of Glass / City of Fallen Angels
Reckless Magic (Star-Crossed #1)
Hopeless Magic
nan
nan
nan
Incarceron
Dreamfever
Shadowfever
Faefever
Bloodfever
Darkfever
Fury
Easy
The Bell Jar
109 East Palace: Robert Oppenheimer and the Secret City of Los Alamos
The Thirteenth Tale
The Widow's War
The Third Twin
Many Lives, Many Masters
Me & Emma
The General's Daughter
