In [4]:
# notebooks/MovieRecommender.ipynb

import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from utils.data_loader import load_data
from utils.popularity import get_top_popular_movies, define_popularity
from utils.similarity import center_rows, compute_similarity
from utils.ibcf import myIBCF
from utils.constants import MIN_COMMON_RATINGS, TOP_K

# 1. Load Data
movies, users, R_df = load_data()

# 2. System I: Popularity-based Recommendation
top_10_popular = get_top_popular_movies(R_df, movies, top_n=10)
display(top_10_popular)

# 3. System II: IBCF
R_matrix = R_df.values
R_centered, row_means = center_rows(R_matrix)

S = compute_similarity(R_centered)

# Display S for specified movies
movies_to_display = ["m1", "m10", "m100", "m1510", "m260", "m3212"]
for mv in movies_to_display:
    idx = int(mv[1:]) - 1
    sim_row = pd.Series(S[idx,:], index=R_df.columns).round(7)
    # Display non-NA similarities (just a sample)
    display(mv, sim_row.dropna().head(10))

# Save S
np.save('../data/similarity_matrix.npy', S)

popularity_ranking = define_popularity(R_df)

# Test myIBCF function
# For user "u1181"
test_user_vector = R_df.loc["u1181"].values
recommended_user1181 = myIBCF(test_user_vector, S, R_df, popularity_ranking)
print("Top 10 recommendations for user u1181:", recommended_user1181)

# Hypothetical user: rates "m1613"=5, "m1755"=4, others NA
NUM_MOVIES = R_df.shape[1]
hypo_user = np.full(NUM_MOVIES, np.nan)
hypo_user[1612] = 5.0  # index for m1613 is 1612
hypo_user[1754] = 4.0  # index for m1755 is 1754

recommended_hypo = myIBCF(hypo_user, S, R_df, popularity_ranking)
print("Top 10 recommendations for the hypothetical user:", recommended_hypo)


Unnamed: 0_level_0,Title,MovieID_str
MovieID,Unnamed: 1_level_1,Unnamed: 2_level_1
2858,American Beauty (1999),m2858
260,Star Wars: Episode IV - A New Hope (1977),m260
1196,Star Wars: Episode V - The Empire Strikes Back...,m1196
1210,Star Wars: Episode VI - Return of the Jedi (1983),m1210
2028,Saving Private Ryan (1998),m2028
1198,Raiders of the Lost Ark (1981),m1198
593,"Silence of the Lambs, The (1991)",m593
2571,"Matrix, The (1999)",m2571
2762,"Sixth Sense, The (1999)",m2762
589,Terminator 2: Judgment Day (1991),m589


'm1'

m150    0.608120
m260    0.641659
m318    0.643794
m356    0.618132
m364    0.627962
m457    0.616146
m527    0.624002
m588    0.636349
m589    0.604416
m593    0.613962
dtype: float64

'm10'

m163     0.559561
m165     0.573200
m349     0.552771
m380     0.575661
m457     0.551900
m733     0.563643
m780     0.564190
m1210    0.551426
m1291    0.562249
m1370    0.556389
dtype: float64

'm100'

m9      0.551868
m113    0.568705
m225    0.574356
m236    0.550385
m259    0.555411
m387    0.561983
m436    0.555419
m478    0.558482
m612    0.550377
m782    0.583883
dtype: float64

'm1510'

Series([], dtype: float64)

'm260'

m1      0.641659
m50     0.672096
m110    0.657400
m318    0.673709
m457    0.643910
m527    0.662275
m541    0.660568
m589    0.647639
m593    0.652069
m750    0.659269
dtype: float64

'm3212'

Series([], dtype: float64)

Top 10 recommendations for user u1181: ['m3338', 'm3415', 'm3013', 'm2514', 'm2585', 'm3780', 'm30', 'm1993', 'm1039', 'm1329']
Top 10 recommendations for the hypothetical user: ['m3900', 'm1507', 'm3566', 'm1401', 'm1399', 'm1310', 'm1202', 'm1063', 'm1044', 'm997']
