# In memory collaboarative filtering

### works better with full interactions. (Unread book ratings included)

In [9]:
import pandas as pd
import numpy as np
# Load the CSV files
book_id_map_df = pd.read_csv("./data/book_id_map.csv")
book_works_df = pd.read_csv("./data/book_works.csv")

book_id_map = dict(zip(book_id_map_df['book_id_csv'], book_id_map_df['book_id']))

def get_work_id(book_id):
    return book_id_map[book_id]

# Define the function to get original title by work_id
def get_original_title_by_book_id(work_id, book_works_df):
    # Find the row with the matching best_book_id
    match = book_works_df[book_works_df['best_book_id'] == work_id]
    
    # If a match is found, return the original title
    if not match.empty:
        return match['original_title'].values[0]
    else:
        return None

## User - User CF

In [115]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def normalize_ratings(user_item_matrix):
    user_means = user_item_matrix.mean(axis=1)
    normalized_matrix = (user_item_matrix.T - user_means).T
    return normalized_matrix, user_means

def pearson_similarity(user_item_matrix):
    user_similarity = user_item_matrix.T.corr(method='pearson')
    return user_similarity

def get_user_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate user-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def predict_ratings(user_item_matrix, user_similarity_df, user_means):
        ratings_diff = user_item_matrix
        pred = user_means[:, np.newaxis] + user_similarity_df.dot(ratings_diff) / np.array([np.abs(user_similarity_df).sum(axis=1)]).T
        return pred

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        top_n_scores = user_pred_ratings_sorted.values[:n]
        return top_n_books, top_n_scores

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)
    
    # Normalize the ratings
    normalized_matrix, user_means = normalize_ratings(updated_user_item_matrix)

    # Compute the updated user similarity matrix
    # Compute the user similarity matrix using Pearson correlation
    user_similarity = cosine_similarity(normalized_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.index)
    
    # Predict ratings for the updated user-item matrix
    predicted_ratings = predict_ratings(normalized_matrix.values, user_similarity_df, user_means.values)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)
    
    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations, top_n_scores = get_top_n_recommendations(predicted_ratings_df, new_user_id, n)

    return top_n_recommendations.tolist(), top_n_scores.tolist()

In [183]:
# New user's ratings DataFrame
new_user_ratings = pd.DataFrame({
    'user_id': 999999999,  # Replace with the new user's ID
    'book_id': [7300, 1201, 7057, 530615, 943],
    'rating': [3, 4, 2, 5, 1]
})

In [178]:
new_user_books = [7300, 1201, 7057, 530615, 943]

for book_id in new_user_books:
    work_id = get_work_id(book_id)
    print(get_original_title_by_book_id(work_id, book_works_df))

The Bell Jar
Little Women
Throne of Glass
The Sins of the Father
Harry Potter and the Philosopher's Stone


In [88]:
# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get user-based CF recommendations
top_n_recommendations_user_based, top_n_scores_user_based = get_user_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 user-based recommendations for the new user: {top_n_recommendations_user_based}")

Top-10 user-based recommendations for the new user: [1201, 19506, 7300, 943, 7057, 536, 941, 939, 613, 938, 1387, 1386, 786, 1000, 944, 1574, 968, 524, 1012, 862, 1211, 1003, 670, 997, 1013, 7432, 1002, 7008, 821, 828]


## Item - Item CF

In [101]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def get_item_based_recommendations(interactions, new_user_ratings, n=10):
    """
    Generate item-based collaborative filtering recommendations for a new user.

    Parameters:
    - interactions (pd.DataFrame): Existing user-item interactions DataFrame.
    - new_user_ratings (pd.DataFrame): New user's ratings DataFrame with columns ['user_id', 'book_id', 'rating'].
    - n (int): Number of recommendations to generate.

    Returns:
    - list: Top-N recommended book IDs for the new user.
    """

    def predict_ratings_item_based(user_item_matrix, item_similarity_df):
        mean_user_rating = user_item_matrix.mean(axis=1)
        ratings_diff = user_item_matrix - mean_user_rating[:, np.newaxis]
        sum_sim = np.array([np.abs(item_similarity_df).sum(axis=1)])
        sum_sim[sum_sim == 0] = 1e-9  # To avoid division by zero
        pred = mean_user_rating[:, np.newaxis] + ratings_diff.dot(item_similarity_df) / sum_sim
        return pred

    def get_top_n_recommendations(predicted_ratings_df, user_id, n=10):
        user_pred_ratings = predicted_ratings_df.loc[user_id]
        user_pred_ratings_sorted = user_pred_ratings.sort_values(ascending=False)
        top_n_books = user_pred_ratings_sorted.index[:n]
        return top_n_books

    # Append the new user's ratings to the interactions DataFrame
    updated_interactions = pd.concat([interactions, new_user_ratings], ignore_index=True)

    # Create the updated user-item interaction matrix
    updated_user_item_matrix = updated_interactions.pivot(index='user_id', columns='book_id', values='rating')
    updated_user_item_matrix.fillna(0, inplace=True)

    # Compute the updated item similarity matrix
    updated_item_similarity = cosine_similarity(updated_user_item_matrix.T)
    updated_item_similarity_df = pd.DataFrame(updated_item_similarity, index=updated_user_item_matrix.columns, columns=updated_user_item_matrix.columns)

    # Predict ratings for the updated user-item matrix
    updated_predicted_ratings_item_based = predict_ratings_item_based(updated_user_item_matrix.values, updated_item_similarity_df.values)
    updated_predicted_ratings_item_based_df = pd.DataFrame(updated_predicted_ratings_item_based, index=updated_user_item_matrix.index, columns=updated_user_item_matrix.columns)

    # Get top-N recommendations for the new user
    new_user_id = new_user_ratings['user_id'].iloc[0]
    top_n_recommendations = get_top_n_recommendations(updated_predicted_ratings_item_based_df, new_user_id, n)

    return top_n_recommendations.tolist()

# Example usage:
# Existing interactions DataFrame
interactions = pd.read_csv("./data/interactions.csv")

# Get item-based CF recommendations
top_n_recommendations_item_based = get_item_based_recommendations(interactions, new_user_ratings, n=30)
print(f"Top-10 item-based recommendations for the new user: {top_n_recommendations_item_based}")

Top-10 item-based recommendations for the new user: [530615, 15455, 15428, 15429, 15437, 15441, 15449, 15473, 15456, 15467, 15459, 15460, 15461, 15463, 15464, 15465, 15426, 15476, 15477, 7892, 7895, 13533, 7246, 7204, 12894, 15466, 6888, 6889, 6890, 6891]


In [85]:
new_user_books = [7300, 1201, 7057, 530615, 943]

for book_id in new_user_books:
    work_id = get_work_id(book_id)
    print(get_original_title_by_book_id(work_id, book_works_df))

The Bell Jar
Little Women
Throne of Glass
The Sins of the Father
Harry Potter and the Philosopher's Stone


### user-user collaborative filtering results

In [95]:
recommended_books = top_n_recommendations_user_based

count = 0
for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        count+=1
        continue
    print(title)
    count+=1

Little Women
nan
The Bell Jar
Harry Potter and the Philosopher's Stone
Throne of Glass
The Hunger Games
Harry Potter and the Prisoner of Azkaban
Harry Potter and the Order of the Phoenix
Harry Potter and the Deathly Hallows
Harry Potter and the Goblet of Fire
Catching Fire
Mockingjay
To Kill a Mockingbird
Twilight
Harry Potter and the Half-Blood Prince
The Giver
Harry Potter and the Chamber of Secrets
Man som hatar kvinnor
Pride and Prejudice
The Catcher in the Rye
Water for Elephants
The Fellowship of the Ring
The Lion, the Witch and the Wardrobe
The Kite Runner
Gone with the Wind (1 vol.)
Jane Eyre
The Lightning Thief
Nineteen Eighty-Four
Where the Sidewalk Ends: The Poems and Drawings of Shel Silverstein


## item-item collaborative filtering

In [102]:
recommended_books = top_n_recommendations_item_based

for book_id in recommended_books:
    work_id = get_work_id(book_id)
    title = get_original_title_by_book_id(work_id, book_works_df)
    if title == None or title == "nan" :
        continue
    print(title)

The Sins of the Father
Presumed Innocent
The Third Twin
Many Lives, Many Masters
109 East Palace: Robert Oppenheimer and the Secret City of Los Alamos
Me & Emma
The Forgotten Garden
A Woman of Substance
Blink: The Power of Thinking Without Thinking
Dark Tide: The Great Boston Molasses Flood of 1919
Crooked Letter, Crooked Letter
The Faith Club: A Muslim, A Christian, A Jew--Three Women Search for Understanding
One Day
Absolute Power
The General's Daughter
The Widow's War
Black and Blue
Where the Heart Is
The Brethren
The Testament
A Reliable Wife
Dare Me
Tell No One
The Thirteenth Tale
Mountains Beyond Mountains: The Quest of Dr. Paul Farmer, a Man Who Would Cure the World
nan
nan
Hopeless Magic
Reckless Magic (Star-Crossed #1)
