In [15]:
import sys
import os

sys.path.append(os.path.abspath("../src"))

In [16]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

from hybrid.recommend_hybrid import recommend_hybrid

from surprise import Dataset, Reader, NMF

In [17]:
ratings_df = pd.read_csv("../data/processed/ratings_final.csv")
books_df = pd.read_csv("../data/processed/books_final.csv")
top_books_df = pd.read_csv("../outputs/top_books_by_count.csv")

In [18]:
ratings_df['ISBN'] = ratings_df['ISBN'].astype(str).str.strip()
books_df['ISBN'] = books_df['ISBN'].astype(str).str.strip()
top_books_df['ISBN'] = top_books_df['ISBN'].astype(str).str.strip()

In [19]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(ratings_df[["User-ID", "ISBN", "Rating"]], reader)
trainset = data.build_full_trainset()
model_cf = NMF(n_factors=20, random_state=42).fit(trainset)

In [20]:
books_df["text_features"] = (
    books_df["Title"].fillna("") + " " +
    books_df["Author"].fillna("") + " " +
    books_df["Publisher"].fillna("")
)

tfidf = TfidfVectorizer(stop_words="english", ngram_range=(1, 2), max_features=5000)
tfidf_matrix = tfidf.fit_transform(books_df["text_features"])
indices = pd.Series(books_df.index, index=books_df["ISBN"])

In [60]:
user_ids = [
    ratings_df['User-ID'].sample(1).iloc[0],
    #1234567890 imaginary user for popularity recommendations
    #39400 user for CBF
]

for uid in user_ids:
    print(f"\n===== Recommendations for User {uid} =====")
    recs = recommend_hybrid(
        user_id=uid,
        ratings_df=ratings_df,
        books_df=books_df,
        model_cf=model_cf,
        tfidf_matrix=tfidf_matrix,
        indices=indices,
        top_books_df=top_books_df,
        n=10
    )
    display(recs)


===== Recommendations for User 6251 =====


Unnamed: 0,User-ID,Title,Score,Source
0,6251,101 Dalmatians,10,CF
1,6251,Lady of Desire,10,CF
2,6251,Mariette in Ecstasy,10,CF
3,6251,Ben & Jerry's Homemade Ice Cream &amp; Dessert...,10,CF
4,6251,Les Heures / The Hours,10,CF
5,6251,Gone Too Far,10,CF
6,6251,The National Parks : A Postcard Folio Book,10,CF
7,6251,Deception on His Mind,10,CF
8,6251,The Crucible: A Play in Four Acts (Twentieth-C...,10,CF
9,6251,The Official Scrabble Players Dictionary (Thir...,10,CF


##  Hybrid Recommendation System Summary

This notebook demonstrates a modular hybrid recommender system that combines:

- **Collaborative Filtering (CF)** using NMF
- **Content-Based Filtering (CBF)** using TF-IDF + Cosine Similarity
- **Popularity-Based Recommendations** as fallback

Each model is encapsulated in its own module and combined via a smart decision function `recommend_hybrid()`, which selects the best method based on the user's rating history.

---

##  Logic Behind Hybrid Switching Strategy

| User Type         | Criteria                          | Recommendation Source |
|------------------|-----------------------------------|------------------------|
| Active User       | ≥ 10 ratings                      | CF (Collaborative Filtering) |
| Mid-level User    | 1–9 ratings                       | CBF (Content-Based) |
| New User          | 0 ratings                         | Popularity (Top-N) |

Fallback logic ensures recommendations are always returned.

---
##  Testing & Evaluation

- Hybrid function was tested on multiple user profiles
- Fallback behavior (CBF, Popularity) works as expected
- Matrix sparsity and cold-start issues are mitigated
- Output includes `User-ID`, `Title`, `Score`, `Source`
