In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity



In [6]:
# ---------- 1. LOAD DATA (paths already fixed & encoding handled) ----------
ratings = pd.read_csv(
    r"C:\Users\Harisiddarth\Downloads\ml-1m\ratings.dat",
    sep="::", engine="python",
    names=["userId", "movieId", "rating", "timestamp"]
)



In [7]:
movies = pd.read_csv(
    r"C:\Users\Harisiddarth\Downloads\ml-1m\movies.dat",
    sep="::", engine="python", encoding="latin-1",
    names=["movieId", "title", "genres"]
)


In [8]:
# ---------- 2. BUILD MATRICES ----------
ui = ratings.pivot_table(index="userId", columns="movieId", values="rating")
# keep NaN (missing) for predictions; only fill zeros to compute similarity
ui_filled = ui.fillna(0)

similarity = cosine_similarity(ui_filled)
sim_df = pd.DataFrame(similarity, index=ui.index, columns=ui.index)



In [9]:
# ---------- 3. RECOMMENDATION FUNCTION ----------
def recommend_for(user_id: int, k: int = 30, n_rec: int = 5):
    if user_id not in ui.index:
        print(f"User {user_id} not found!");  return
    print(f"\n🎯 Top {n_rec} Recommendations for User {user_id}:\n")

    # movies the user hasn’t rated
    unrated = ui.loc[user_id][ui.loc[user_id].isna()].index
    if len(unrated) == 0:
        print("User has rated every movie!"); return

    # take k most‑similar users with positive similarity
    sims = sim_df.loc[user_id].drop(user_id)
    top_neighbors = sims[sims > 0].nlargest(k)

    preds = {}
    for m in unrated:
        # ratings from neighbours who rated this movie
        neigh_ratings = ui.loc[top_neighbors.index, m].dropna()
        if neigh_ratings.empty:          # nobody rated → skip / fallback
            continue
        weights = top_neighbors.loc[neigh_ratings.index]
        if weights.sum() == 0:           # all similarities zero?
            continue
        preds[m] = np.dot(weights, neigh_ratings) / weights.sum()

    if not preds:
        print("Couldn’t generate predictions (try another user or larger k).")
        return

    # top N
    top = sorted(preds.items(), key=lambda x: x[1], reverse=True)[:n_rec]
    for mid, score in top:
        title = movies.loc[movies.movieId == mid, 'title'].values[0]
        print(f"{title}  (Pred ≈ {score:.2f})")



In [11]:
# ---------- 4. RUN ----------
recommend_for(user_id=1,k=30,n_rec=5)


🎯 Top 5 Recommendations for User 1:

Little Women (1994)  (Pred ≈ 5.00)
Cat Ballou (1965)  (Pred ≈ 5.00)
Copycat (1995)  (Pred ≈ 5.00)
Dumb & Dumber (1994)  (Pred ≈ 5.00)
Rudy (1993)  (Pred ≈ 5.00)
