In [1]:
import sys
import os

sys.path.append(os.path.abspath("../src"))

In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

from hybrid.recommend_hybrid import recommend_hybrid

from surprise import Dataset, Reader, NMF

In [3]:
ratings_df = pd.read_csv("../data/processed/ratings_final.csv")
books_df = pd.read_csv("../data/processed/books_final.csv")
top_books_df = pd.read_csv("../outputs/top_books_by_count.csv")

In [4]:
ratings_df['ISBN'] = ratings_df['ISBN'].astype(str).str.strip()
books_df['ISBN'] = books_df['ISBN'].astype(str).str.strip()
top_books_df['ISBN'] = top_books_df['ISBN'].astype(str).str.strip()

In [5]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings_df[["User-ID", "ISBN", "Rating"]], reader)
trainset = data.build_full_trainset()
model_cf = NMF(n_factors=20, random_state=42).fit(trainset)

In [6]:
books_df["text_features"] = (
    books_df["Title"].fillna("") + " " +
    books_df["Author"].fillna("") + " " +
    books_df["Publisher"].fillna("")
)

tfidf = TfidfVectorizer(stop_words="english", ngram_range=(1, 2), max_features=5000)
tfidf_matrix = tfidf.fit_transform(books_df["text_features"])
indices = pd.Series(books_df.index, index=books_df["ISBN"])

In [None]:
user_ids = [
    #ratings_df['User-ID'].sample(1).iloc[0],
    #1234567890 imaginary user for popularity recommendations
    #39400 user for CBF
]

for uid in user_ids:
    print(f"\n===== Recommendations for User {uid} =====")
    recs = recommend_hybrid(
        user_id=uid,
        ratings_df=ratings_df,
        books_df=books_df,
        model_cf=model_cf,
        tfidf_matrix=tfidf_matrix,
        indices=indices,
        top_books_df=top_books_df,
        n=10
    )
    display(recs)


===== Recommendations for User 39400 =====


Unnamed: 0,User-ID,Title,Score,Source
8155,39400,Ripley's Game (Vintage Crime/Black Lizard),0.931671,CBF
9055,39400,Ripley Under Water (Vintage Crime/Black Lizard),0.930162,CBF
3848,39400,The Thin Man (Vintage Crime/Black Lizard),0.850586,CBF
5563,39400,Fletch (Vintage Crime/Black Lizard),0.835191,CBF
2063,39400,The Maltese Falcon (Vintage Crime/Black Lizard),0.80043,CBF
6501,39400,Dead Lagoon: An Aurelio Zen Mystery (Vintage C...,0.764768,CBF
12258,39400,A Coffin for Dimitrios (Vintage Crime/Black Li...,0.756763,CBF
10732,39400,Pop. 1280 (Vintage Crime/Black Lizard),0.754447,CBF
5127,39400,"Farewell, My Lovely (Vintage Crime/Black Lizard)",0.74785,CBF
9776,39400,When I Was Puerto Rican,0.490502,CBF
