# In memory collaboarative filtering

### works better with full interactions. (Unread book ratings included)

In [287]:
import pandas as pd
import numpy as np
# Load the CSV files
book_id_map_df = pd.read_csv("./data/book_id_map.csv")
book_works_df = pd.read_csv("./data/book_works.csv")

book_id_map = dict(zip(book_id_map_df['book_id_csv'], book_id_map_df['book_id']))

def get_work_id(book_id):
    return book_id_map[book_id]

# Define the function to get original title by work_id
def get_original_title_by_book_id(work_id, book_works_df):
    # Find the row with the matching best_book_id
    match = book_works_df[book_works_df['best_book_id'] == work_id]
    
    # If a match is found, return the original title
    if not match.empty:
        return match['original_title'].values[0]
    else:
        return None

## User - User CF

In [288]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def normalize_ratings(user_item_matrix):
    user_means = user_item_matrix.mean(axis=1).values.reshape(-1, 1)
    user_stds = user_item_matrix.std(axis=1).replace(0, 1).values.reshape(-1, 1)  # Replace 0 std with 1 to avoid division by zero
    normalized_matrix = (user_item_matrix - user_means) / user_stds
    return normalized_matrix, user_means, user_stds

def predict_ratings(user_item_matrix, user_similarity_df, user_means, user_stds):
    ratings_diff = user_item_matrix
    pred = user_means + user_similarity_df.dot(ratings_diff) / np.array([np.abs(user_similarity_df).sum(axis=1)]).T
    pred = pred * user_stds  # Scale predictions back to original scale
    return pred

def get_user_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate user-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        top_n_scores = user_pred_ratings_sorted.values[:n]
        return top_n_books, top_n_scores

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)
    
    # Normalize the ratings
    normalized_matrix, user_means, user_stds = normalize_ratings(updated_user_item_matrix)

    # Check for NaN or infinity values and handle them
    normalized_matrix = np.nan_to_num(normalized_matrix, nan=0.0, posinf=0.0, neginf=0.0)
    
    # Compute the user similarity matrix using Pearson correlation
    user_similarity = cosine_similarity(normalized_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.index)
    
    # Predict ratings for the updated user-item matrix
    predicted_ratings = predict_ratings(normalized_matrix, user_similarity_df, user_means, user_stds)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)
    
    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations, top_n_scores = get_top_n_recommendations(predicted_ratings_df, new_user_id, n)

    return top_n_recommendations.tolist(), top_n_scores.tolist()

In [296]:
# New user's ratings DataFrame
new_user_ratings = pd.DataFrame({
    'user_id': 999999999,  # Replace with the new user's ID
    'book_id': [7300, 1201, 100385, 530615, 48625, 14870, 7170, 19782, 1146577],
    'rating': [5, 4, 3, 5, 1, 3, 2, 5, 1]
})

In [297]:
new_user_books = [7300, 1201, 100385, 530615, 48625, 14870, 7170, 19782, 1146577]

for book_id in new_user_books:
    work_id = get_work_id(book_id)
    print(get_original_title_by_book_id(work_id, book_works_df))

The Bell Jar
Little Women
Cobalt Blue: The Novel
The Sins of the Father
Call Me by Your Name
The Pact
The Picture of Dorian Gray
Sula
Never Let Me Go


In [306]:
# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get user-based CF recommendations
top_n_recommendations_user_based, top_n_scores_user_based = get_user_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 user-based recommendations for the new user: {top_n_recommendations_user_based}")

Top-10 user-based recommendations for the new user: [7300, 19509, 19506, 1201, 14870, 19508, 7170, 19510, 19507, 7432, 1012, 1003, 1402, 1211, 786, 943, 670, 536, 1013, 1574, 862, 460, 1000, 613, 1007, 1386, 941, 839, 938, 1010]


## Item - Item CF

In [292]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_item_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate item-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def predict_ratings_item_based(user_item_matrix, item_similarity_df):
        mean_user_rating = user_item_matrix.mean(axis=1)
        ratings_diff = user_item_matrix - mean_user_rating[:, np.newaxis]
        sum_sim = np.array([np.abs(item_similarity_df).sum(axis=1)])
        sum_sim[sum_sim == 0] = 1e-9  # To avoid division by zero
        pred = mean_user_rating[:, np.newaxis] + ratings_diff.dot(item_similarity_df) / sum_sim
        return pred

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        return top_n_books

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)

    # Compute the updated item similarity matrix
    updated_item_similarity = cosine_similarity(updated_user_item_matrix.T)
    updated_item_similarity_df = pd.DataFrame(updated_item_similarity, index=updated_user_item_matrix.columns, columns=updated_user_item_matrix.columns)

    # Predict ratings for the updated user-item matrix
    updated_predicted_ratings_item_based = predict_ratings_item_based(updated_user_item_matrix.values, updated_item_similarity_df.values)
    updated_predicted_ratings_item_based_df = pd.DataFrame(updated_predicted_ratings_item_based, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)

    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations = get_top_n_recommendations(updated_predicted_ratings_item_based_df, new_user_id, n)

    return top_n_recommendations.tolist()

# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get item-based CF recommendations
top_n_recommendations_item_based = get_item_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 item-based recommendations for the new user: {top_n_recommendations_item_based}")

Top-10 item-based recommendations for the new user: [1146577, 530615, 100385, 48625, 19782, 14870, 7300, 7170, 1201, 13293, 13311, 13310, 13309, 13304, 13302, 13298, 13296, 13294, 13281, 13289, 13283, 13282, 13333, 13276, 13275, 13273, 13266, 13258, 13235, 6508]


In [293]:
for book_id in new_user_books:
    work_id = get_work_id(book_id)
    print(get_original_title_by_book_id(work_id, book_works_df))

The Bell Jar
Little Women
Cobalt Blue: The Novel
The Sins of the Father
Call Me by Your Name
The Pact
The Picture of Dorian Gray
Sula
Never Let Me Go


### user-user collaborative filtering results

In [307]:
recommended_books = top_n_recommendations_user_based

count = 0
for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        count+=1
        continue
    print(title)
    count+=1

The Bell Jar
Breathing Underwater
nan
Little Women
The Pact
Snobs
The Picture of Dorian Gray
Dragon Spear
Mark of the Thief
Gone with the Wind (1 vol.)
Pride and Prejudice
The Book Thief
Water for Elephants
To Kill a Mockingbird
Harry Potter and the Philosopher's Stone
The Fellowship of the Ring
The Hunger Games
The Kite Runner
The Giver
The Catcher in the Rye
The Secret Garden
Twilight
Harry Potter and the Deathly Hallows
The Help
Mockingjay
Harry Potter and the Prisoner of Azkaban
Lord of the Flies
Harry Potter and the Goblet of Fire
The Curious Incident of the Dog in the Night-Time


## item-item collaborative filtering

In [295]:
recommended_books = top_n_recommendations_item_based

for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        continue
    print(title)

Never Let Me Go
The Sins of the Father
Cobalt Blue: The Novel
Call Me by Your Name
Sula
The Pact
The Bell Jar
The Picture of Dorian Gray
Little Women
nan
Killing Jesus: A History
The Mermaid's Sister


Angel Time
My Sunshine Away
Of Love and Evil
The Girl from Krakow
Thor, Volume 2: Who Holds the Hammer?
nan
Pines
Thor, Volume 1: The Goddess of Thunder
The Awakening
Dear Ijeawele, Or a Feminist Manifesto in Fifteen Suggestions
nan
One Day We'll All Be Dead and None of This Will Matter
Since We Fell
The Wolf Gift
