#Imports

In [None]:
import scipy
import math
import sklearn
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds
import pandas as pd
import math
import numpy as np
import random
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix


#Split into train and test

In [None]:
import math
import pandas as pd
from sklearn.model_selection import train_test_split

def get_train_test_split(dataframe, min_interactions=3, test_size=0.30, random_state=42):
    # Function to apply logarithmic smoothing to a value
    def apply_log_smoothing(value):
        return math.log(1 + value, 2)

    # Group by ISBN and User-ID, count interactions, and summarize by user
    interaction_summary = dataframe.groupby(['ISBN', 'User-ID']).size()
    user_interaction_totals = interaction_summary.groupby('User-ID').size()
    print(f'Total number of users: {len(user_interaction_totals)}')

    # Filter users with at least 'min_interactions' interactions
    qualified_users = user_interaction_totals[user_interaction_totals >= min_interactions].reset_index()[['User-ID']]
    print(f'Users with at least {min_interactions} interactions: {len(qualified_users)}')

    # Filter the dataset to include only interactions from qualified users
    qualified_interactions = dataframe.merge(qualified_users, on='User-ID', how='right')
    print(f'Total interactions: {len(dataframe)}')
    print(f'Interactions from qualified users: {len(qualified_interactions)}')

    # Apply smoothing to the sum of book ratings and reset the index
    smoothed_interactions = qualified_interactions.groupby(['ISBN', 'User-ID'])['Book-Rating'].sum().apply(apply_log_smoothing).reset_index()
    print(f'Unique user/item interactions: {len(smoothed_interactions)}')

    # Split data into training and testing sets
    train_data, test_data = train_test_split(smoothed_interactions,
                                             stratify=smoothed_interactions['User-ID'],
                                             test_size=test_size,
                                             random_state=random_state)
    print(f'Interactions on Train set: {len(train_data)}')
    print(f'Interactions on Test set: {len(test_data)}')

    return train_data, test_data, smoothed_interactions


#SVD

In [None]:

def matrix_factorization_predictions(train_df, num_factors=15):
    """Performs matrix factorization using SVD on the user-item ratings matrix from training data.

    Args:
        train_df (DataFrame): Training data containing user, item, and ratings.
        num_factors (int): Number of latent factors to use in the matrix factorization.

    Returns:
        DataFrame: A DataFrame with the predicted ratings for all users and items.
    """
    # Create pivot table
    users_items_pivot_matrix_df = train_df.pivot(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)

    # Convert the pivot table to a sparse matrix format
    users_items_pivot_matrix = csr_matrix(users_items_pivot_matrix_df.values)

    # Perform matrix factorization using SVD
    U, sigma, Vt = svds(users_items_pivot_matrix, k=num_factors)
    sigma = np.diag(sigma)

    # Predict ratings
    all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)

    # Normalize the ratings to be between 0 and 10
    min_val = np.min(all_user_predicted_ratings)
    max_val = np.max(all_user_predicted_ratings)
    all_user_predicted_ratings_norm = ((all_user_predicted_ratings - min_val) /
                                       (max_val - min_val)) * 10

    # Convert the matrix back to a DataFrame
    cf_preds_df = pd.DataFrame(all_user_predicted_ratings_norm, columns=users_items_pivot_matrix_df.columns, index=users_items_pivot_matrix_df.index).transpose()

    return cf_preds_df



#User based Collaborative Filtering using Matrix factorization

In [None]:

# Assuming your CFRecommender class has a method recommend_items that can return ratings predictions
# First, extend your CFRecommender class to include a method to predict ratings for a given user and item

class CFRecommender:
    MODEL_NAME = 'Collaborative Filtering'

    def __init__(self, cf_predictions_df):
        self.cf_predictions_df = cf_predictions_df

    def get_model_name(self):
        return self.MODEL_NAME

    def recommend_items(self, user_id, items_to_ignore=[], topn=10):
        sorted_user_predictions = self.cf_predictions_df[user_id].sort_values(ascending=False).reset_index().rename(columns={user_id: 'recStrength'})
        recommendations_df = sorted_user_predictions[~sorted_user_predictions['ISBN'].isin(items_to_ignore)].sort_values('recStrength', ascending=False).head(topn)
        return recommendations_df

    def predict_rating(self, user_id, item_id):
        if user_id in self.cf_predictions_df.columns and item_id in self.cf_predictions_df.index:
            return self.cf_predictions_df.loc[item_id, user_id]
        else:
            return np.nan  # Return NaN for user/item combinations not in the matrix


In [None]:
class ModelRecommender:

    def __init__(self, interactions_full_indexed_df,interactions_test_indexed_df, interactions_train_indexed_df, ratings_df_unique ):
        self.interactions_full_indexed_df = interactions_full_indexed_df
        self.interactions_test_indexed_df = interactions_test_indexed_df
        self.interactions_train_indexed_df = interactions_train_indexed_df
        self.ratings_df_unique = ratings_df_unique

    def get_items_interacted(UserID, interactions_df):
      interacted_items = interactions_df.loc[UserID]['ISBN']
      return set(interacted_items if type(interacted_items) == pd.Series else [interacted_items])

    # Function for getting the set of items which a user has not interacted with
    def get_not_interacted_items_sample(self, UserID, sample_size, seed=42):
        interacted_items = get_items_interacted(UserID, self.interactions_full_indexed_df)
        all_items = set(ratings_df['ISBN'])
        non_interacted_items = all_items - interacted_items

        random.seed(seed)
        non_interacted_items_sample = random.sample(non_interacted_items, sample_size)
        return set(non_interacted_items_sample)

#
    # Function to verify whether a particular item_id was present in the set of top N recommended items
    def _verify_hit_top_n(self, item_id, recommended_items, topn):
            try:
                index = next(i for i, c in enumerate(recommended_items) if c == item_id)
            except:
                index = -1
            hit = int(index in range(0, topn))
            return hit, index

    # Function to evaluate the performance of model for each user
    def evaluate_model_for_user(self, model, person_id, mood):

        # Getting the items in test set
        interacted_values_testset = self.interactions_test_indexed_df.loc[person_id]

        if type(interacted_values_testset['ISBN']) == pd.Series:
            person_interacted_items_testset = set(interacted_values_testset['ISBN'])
        else:
            person_interacted_items_testset = set([int(interacted_values_testset['ISBN'])])

        interacted_items_count_testset = len(person_interacted_items_testset)

        # Getting a ranked recommendation list from the model for a given user
        #person_recs_df = model.recommend_items(person_id, items_to_ignore=get_items_interacted(person_id, interactions_train_indexed_df),topn=10000000000)
        person_recs_df = model.recommend_items(person_id, items_to_ignore=[],topn=10000000000)
        updated_person_recs_df = person_recs_df.merge(self.ratings_df_unique[['ISBN', 'Max Mood', 'Book']], on='ISBN', how='left')
        updated_person_recs_df = updated_person_recs_df[updated_person_recs_df['Max Mood'].str.contains(mood, na=False)]
        #print('Recommendation for User-ID = ',person_id)
        #print(updated_person_recs_df.head(10))
        return updated_person_recs_df.head(5)

        # Function to evaluate the performance of model at overall level
    def recommend_book(self, model ,userid, mood):

        person_metrics = self.evaluate_model_for_user(model, userid, mood)
        return person_metrics

#model_recommender = ModelRecommender()

#RMSE

In [None]:
def calculate_rmse(test_df, cf_recommender_model):
  # Now, predict ratings for all user-item pairs in the test set
  test_users = test_df['User-ID']
  test_items = test_df['ISBN']
  predicted_ratings = [cf_recommender_model.predict_rating(user, item) for user, item in zip(test_users, test_items)]

  # Add these predictions back to the test dataframe
  test_df['predicted_rating'] = predicted_ratings

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(test_df['Book-Rating'], test_df['predicted_rating'].fillna(0)))
  print(f"RMSE: {rmse}")

#BUILD model

In [None]:
# Method to build the model.
def build_model():
  ratings_df = pd.read_csv("/content/baseline_ratinsg.csv")
  ratings_df.head()
  ratings_df.rename(columns={'user_id':'User-ID','isbn':'ISBN','book_rating':'Book-Rating'},inplace=True)
  ratings_df_unique = ratings_df.drop_duplicates(subset='ISBN')
  train_df, test_df, interactions_full_df = get_train_test_split(ratings_df)
  print(f'Interactions on Train set: %d' % len(train_df))
  print(f'Interactions on Test set: %d' % len(test_df))

  cf_preds_df = matrix_factorization_predictions(train_df)
  cf_preds_df.head()
  interactions_full_indexed_df = interactions_full_df.set_index('User-ID')
  interactions_train_indexed_df = train_df.set_index('User-ID')
  interactions_test_indexed_df = test_df.set_index('User-ID')
  cf_recommender_model = CFRecommender(cf_preds_df)
  model_recommender = ModelRecommender(interactions_full_indexed_df,interactions_test_indexed_df, interactions_train_indexed_df, ratings_df_unique)
  calculate_rmse(test_df, cf_recommender_model)
  return model_recommender, cf_recommender_model, test_df, train_df

In [None]:
model_recommender, cf_recommender_model, test_df, train_df = build_model()


Total number of users: 16795
Users with at least 3 interactions: 3451
Total interactions: 46112
Interactions from qualified users: 30359
Unique user/item interactions: 30359
Interactions on Train set: 21251
Interactions on Test set: 9108
Interactions on Train set: 21251
Interactions on Test set: 9108
RMSE: 1.160720057494315


In [None]:
def recommend_books_based_on_mood(mood, user_id):
  ret_updated_person_recs_df = model_recommender.recommend_book(cf_recommender_model,user_id,mood)
  print(ret_updated_person_recs_df)
  list_ret_updated_person_recs_df = list(ret_updated_person_recs_df['ISBN'])
  list_ret_updated_person_recs_df = [number.zfill(10) for number in list_ret_updated_person_recs_df]
  return list_ret_updated_person_recs_df

In [None]:

print((recommend_books_based_on_mood('Motivational', 148744)))

          ISBN  recStrength      Max Mood                       Book
26   446610399     4.528759  Motivational                 the rescue
86   446356832     4.302083  Motivational          the sands of time
136  553211684     4.229585  Motivational  tess of the d'urbervilles
143  743211383     4.222939  Motivational               dreamcatcher
148  451168364     4.221412  Motivational          nectar in a sieve
['0446610399', '0446356832', '0553211684', '0743211383', '0451168364']
