In [None]:
# notebooks/MovieRecommender.ipynb

import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from utils.data_loader import load_data
from utils.popularity import get_top_popular_movies, define_popularity
from utils.similarity import center_rows, compute_similarity
from utils.ibcf import myIBCF
from utils.constants import NUM_MOVIES

# 1. Load Data
movies, users, R_df = load_data()

# 2. System I: Popularity-based Recommendation
top_10_popular = get_top_popular_movies(R_df, movies, top_n=10)
display(top_10_popular)

# 3. System II: IBCF
#    (a) Normalize rating matrix by centering each row.
R_matrix = R_df.values
R_centered, row_means = center_rows(R_matrix)

#    (b) Compute similarity
S = compute_similarity(R_centered)

# Display S for given movies: "m1", "m10", "m100", "m1510", "m260", "m3212"
movies_to_display = ["m1", "m10", "m100", "m1510", "m260", "m3212"]
for mv in movies_to_display:
    idx = int(mv[1:]) - 1
    # round to 7 decimal places and display non-NA values
    sim_row = pd.Series(S[idx,:], index=R_df.columns).round(7)
    display(mv, sim_row.dropna().head(10))  # show a sample of similarities

# Save S for future use
np.save('../data/similarity_matrix.npy', S)

# Prepare popularity ranking for fallback
popularity_ranking = define_popularity(R_df)

# 4. Test myIBCF function
# User "u1181" from rating matrix R
test_user_id = 1181
test_user_vector = R_df.loc["u1181"].values

recommended_user1181 = myIBCF(test_user_vector, S, R_df, popularity_ranking)
print("Top 10 recommendations for user u1181:", recommended_user1181)

# Hypothetical user: rates "m1613"=5, "m1755"=4, others NA
hypo_user = np.full(NUM_MOVIES, np.nan)
hypo_user[1612] = 5.0  # m1613 index is 1612 (0-based)
hypo_user[1754] = 4.0  # m1755 index is 1754 (0-based)

recommended_hypo = myIBCF(hypo_user, S, R_df, popularity_ranking)
print("Top 10 recommendations for the hypothetical user:", recommended_hypo)
