In [3]:
# --- Setup
import os, json, math, random
from datetime import datetime

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

# Import your NeuMF model
from models.models import NeuMF

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# --- Define paths ---
DATA_DIR = "data"
RESULTS_DIR = "results"

RATINGS_PATH   = os.path.join(DATA_DIR, "ratings_reindexed.csv")
REV_IIDS_PATH  = os.path.join(DATA_DIR, "rev_movieids.csv")
MODEL_PATH     = os.path.join(RESULTS_DIR, "neumf_model.pth")

# --- Reproducibility ---
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)


Using device: cpu


<torch._C.Generator at 0x139a9b5d0>

In [5]:
# --- User Signup Simulation ---

print("🧾 Please enter your details to sign up:\n")

username   = input("Enter a username: ").strip()
first_name = input("Enter your first name: ").strip()
last_name  = input("Enter your last name: ").strip()
email      = input("Enter your email: ").strip()
password   = input("Enter your password: ").strip()

print("\n✅ Signup details received:")
print(f"Username: {username}")
print(f"Name: {first_name} {last_name}")
print(f"Email: {email}")

🧾 Please enter your details to sign up:



KeyboardInterrupt: Interrupted by user

In [None]:
# --- Step 2: Onboarding – Show Top Interacted Movies and Ask for Likes/Dislikes ---
print("Before we personalize your recommendations, please like or dislike at least 4 movies.\n")

# Load data
ratings_df = pd.read_csv(RATINGS_PATH)
rev_movieids = pd.read_csv(REV_IIDS_PATH)  # columns: original_movieId, reindexed_movieId, title, genres

# Find most popular (most-interacted) movies
top_movies = (
    ratings_df.groupby("movieId")
    .agg(avg_rating=("rating", "mean"), num_ratings=("rating", "count"))
    .sort_values("num_ratings", ascending=False)
    .head(20)
    .reset_index()
)

# Merge with rev_movieids to get titles/genres
top_movies = top_movies.merge(
    rev_movieids,
    left_on="movieId",
    right_on="reindexed_movieId",
    how="left"
)[["reindexed_movieId", "title", "genres", "avg_rating", "num_ratings"]].dropna(subset=["title"]).reset_index(drop=True)

top_movies

# --- Interaction: ask user for likes/dislikes ---
liked_items = []
disliked_items = []

print("🎞️ Please respond with 'l' to like or 'd' to dislike.\n")

for idx, row in top_movies.iterrows():
    print(f"{idx+1:2d}. {row['title']} ({row['genres']})")
    while True:
        response = input("   👉 Like or Dislike (l/d)? ").strip().lower()
        if response in ["l", "d"]:
            break
        else:
            print("   ⚠️ Please enter 'l' to like or 'd' to dislike.")
    if response == "l":
        liked_items.append(int(row["reindexed_movieId"]))
    else:
        disliked_items.append(int(row["reindexed_movieId"]))

    # Require at least 4 total responses
    if len(liked_items) + len(disliked_items) >= 4:
        cont = input("Continue rating more movies? (y/n): ").strip().lower()
        if cont == "n":
            break

print("\n✅ Thanks for your responses!")
print(f"Liked movies: {liked_items}")
print(f"Disliked movies: {disliked_items}")


Before we personalize your recommendations, please like or dislike at least 4 movies.



Unnamed: 0,reindexed_movieId,title,genres,avg_rating,num_ratings
0,20,Forrest Gump (1994),Comedy|Drama|Romance|War,4.164134,329
1,232,"Shawshank Redemption, The (1994)",Crime|Drama,4.429022,317
2,16,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.197068,307
3,34,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,4.16129,279
4,166,"Matrix, The (1999)",Action|Sci-Fi|Thriller,4.192446,278
5,15,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.231076,251
6,26,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller,3.75,238
7,7,Braveheart (1995),Action|Drama|War,4.031646,237
8,478,Terminator 2: Judgment Day (1991),Action|Sci-Fi,3.970982,224
9,28,Schindler's List (1993),Drama|War,4.225,220


In [None]:
# --- Step 3: Add New User, Save Interactions, Update Model ---
import json
from scripts.db_helper import add_user  # ✅ helper that inserts new user into database
from models.models import NeuMF
import torch.nn as nn

# --- Load current data and model ---
ratings_df = pd.read_csv(RATINGS_PATH)
rev_movieids = pd.read_csv(REV_IIDS_PATH)

# Determine the next user index
new_user_id = int(ratings_df["userId"].max()) + 1
print(f"\n🆕 Next user ID → {new_user_id}")

# --- Add user to database ---
favorites_json = json.dumps(liked_items)
add_user(username, password, first_name, last_name, email, favorites_json)
print("✅ User added to database successfully.")

# --- Add or update the interactions in ratings_reindexed.csv ---
def update_ratings_csv(ratings_df, user_id, liked, disliked):
    """Safely add or update likes/dislikes in ratings_reindexed.csv"""
    new_entries = []
    
    # Handle likes
    for mid in liked:
        if ((ratings_df["userId"] == user_id) & (ratings_df["movieId"] == mid)).any():
            # Update existing rating
            ratings_df.loc[
                (ratings_df["userId"] == user_id) & (ratings_df["movieId"] == mid),
                ["rating", "label"]
            ] = [5.0, 1]
        else:
            new_entries.append([user_id, mid, 5.0, 1])
    
    # Handle dislikes
    for mid in disliked:
        if ((ratings_df["userId"] == user_id) & (ratings_df["movieId"] == mid)).any():
            ratings_df.loc[
                (ratings_df["userId"] == user_id) & (ratings_df["movieId"] == mid),
                ["rating", "label"]
            ] = [3.0, 0]
        else:
            new_entries.append([user_id, mid, 3.0, 0])
    
    if new_entries:
        new_df = pd.DataFrame(new_entries, columns=["userId", "movieId", "rating", "label"])
        ratings_df = pd.concat([ratings_df, new_df], ignore_index=True)
    
    # Save updated ratings
    ratings_df.to_csv(RATINGS_PATH, index=False)
    return ratings_df

ratings_df = update_ratings_csv(ratings_df, new_user_id, liked_items, disliked_items)
print("💾 Updated ratings_reindexed.csv successfully.")

# --- Load trained NeuMF model ---
n_users_old = ratings_df["userId"].nunique() - 1
n_items = ratings_df["movieId"].nunique()

model = NeuMF(n_users_old, n_items, k_gmf=32, k_mlp=32, mlp_layers=(64, 32, 16)).to(device)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()

prev_user_count = model.ug.num_embeddings
print(f"🧠 Model before resizing: {prev_user_count} users")

# --- Find closest user by average rating ---
user_means = ratings_df.groupby("userId")["rating"].mean()
closest_user = abs(user_means - user_means.loc[new_user_id]).sort_values().index[1]
print(f"🔍 Closest existing user to {new_user_id}: user {closest_user}")

# --- Expand NeuMF embeddings ---
with torch.no_grad():
    new_ug = torch.cat([model.ug.weight.data, model.ug.weight.data[closest_user].unsqueeze(0)], dim=0)
    new_um = torch.cat([model.um.weight.data, model.um.weight.data[closest_user].unsqueeze(0)], dim=0)
    model.ug = nn.Embedding.from_pretrained(new_ug, freeze=False)
    model.um = nn.Embedding.from_pretrained(new_um, freeze=False)

torch.save(model.state_dict(), MODEL_PATH)
print("✅ Model resized and saved with new user embedding.")

# --- Verification Summary ---
print("\n🔎 Verification Summary:")
print(f"   Added user ID: {new_user_id}")
print(f"   Closest matched user: {closest_user}")
print(f"   Embedding count before: {prev_user_count} → after: {model.ug.num_embeddings}")
print(f"   Alignment check: {'✅ OK' if new_user_id == model.ug.num_embeddings - 1 else '⚠️ Mismatch!'}")
print(f"   Favorites stored: {favorites_json}")



🆕 Next user ID → 610
✅ User added to database successfully.
💾 Updated ratings_reindexed.csv successfully.
🧠 Model before resizing: 610 users
🔍 Closest existing user to 610: user 347
✅ Model resized and saved with new user embedding.

🔎 Verification Summary:
   Added user ID: 610
   Closest matched user: 347
   Embedding count before: 610 → after: 611
   Alignment check: ✅ OK
   Favorites stored: [20, 232, 34, 166, 15]


In [15]:
# =====================================================
# 🎬 MAIN DASHBOARD — POPULAR, FAVORITES, RECOMMENDATIONS
# =====================================================

import os
import json
import torch
import pandas as pd

# ---- Project imports ----
from models.models import NeuMF
from scripts.db_helper import get_user, get_user_favorites  # ✅ from your helper file

# ---- Paths ----
DATA_DIR     = "data"
RESULTS_DIR  = "results"
MODEL_PATH   = os.path.join(RESULTS_DIR, "neumf_model.pth")
RATINGS_PATH = os.path.join(DATA_DIR, "ratings_reindexed.csv")
REV_PATH     = os.path.join(DATA_DIR, "rev_movieids.csv")

# ---- Device ----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"⚙️ Using device: {device}")

# =====================================================
# 1️⃣ USER LOGIN
# =====================================================

print("🔐 Please log in to continue\n")
username = input("Enter username: ").strip()
password = input("Enter password: ").strip()

user = get_user(username)

if not user or user["password"] != password:
    raise SystemExit("❌ Invalid username or password. Please try again.")

user_id = user["id"]
first_name = user["first_name"]
last_name = user["last_name"]
print(f"\n✅ Welcome back, {first_name} {last_name}! (User ID = {user_id})")

# ---- Favorites ----
favorites = get_user_favorites(user_id)
print(f"💖 You currently have {len(favorites)} favorites: {favorites}")

# =====================================================
# 2️⃣ LOAD DATA + MODEL
# =====================================================

ratings_df = pd.read_csv(RATINGS_PATH)
rev_movies = pd.read_csv(REV_PATH)

n_users = ratings_df["userId"].nunique()
n_items = ratings_df["movieId"].nunique()

# Load saved NeuMF model
model = NeuMF(n_users, n_items, k_gmf=32, k_mlp=32, mlp_layers=(64,32,16)).to(device)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model.eval()

# =====================================================
# 3️⃣ POPULAR MOVIES (Top 30)
# =====================================================

top_popular = (
    ratings_df.groupby("movieId")
    .agg(avg_rating=("rating", "mean"), num_ratings=("rating", "count"))
    .sort_values("num_ratings", ascending=False)
    .head(30)
    .reset_index()
)

top_popular = top_popular.merge(
    rev_movies[["reindexed_movieId", "title", "genres"]],
    left_on="movieId",
    right_on="reindexed_movieId",
    how="left"
).dropna(subset=["title"])

print("\n🍿 TOP 30 POPULAR MOVIES:")
for _, row in top_popular.iterrows():
    print(f"   🎞️ {row['title']}  ({row['genres']})")

# =====================================================
# 4️⃣ FAVORITES TAB
# =====================================================

# =====================================================
# 4️⃣ FAVORITES TAB (fixed for string issue)
# =====================================================

raw_favorites = favorites

# Force-convert favorites into a real list
if isinstance(raw_favorites, str):
    try:
        favorites = json.loads(raw_favorites)
    except json.JSONDecodeError:
        favorites = []
elif isinstance(raw_favorites, (tuple, list)):
    # Sometimes DB returns a tuple like ('[20, 34]',)
    if len(raw_favorites) == 1 and isinstance(raw_favorites[0], str):
        try:
            favorites = json.loads(raw_favorites[0])
        except json.JSONDecodeError:
            favorites = []
    else:
        favorites = list(raw_favorites)
else:
    favorites = []

print(f"\n💖 You have {len(favorites)} favorites: {favorites}")

if favorites:
    fav_df = rev_movies[rev_movies["reindexed_movieId"].isin(favorites)]
    print("\n💖 YOUR FAVORITES:")
    for _, row in fav_df.iterrows():
        print(f"   ⭐ {row['title']}  ({row['genres']})")
else:
    print("\n💖 You have no favorites yet. Like some movies to fill this list!")

# =====================================================
# 5️⃣ RECOMMENDATION TAB (via NeuMF)
# =====================================================

def recommend_movies_for_user(model, user_id, top_k=10):
    all_items = torch.arange(model.ig.weight.size(0)).to(device)
    model.eval()
    with torch.no_grad():
        u_gmf = model.ug(torch.tensor([user_id]).to(device))
        u_mlp = model.um(torch.tensor([user_id]).to(device))
        g = u_gmf * model.ig(all_items)
        m = model.mlp(torch.cat([u_mlp.expand_as(model.im(all_items)), model.im(all_items)], dim=1))
        scores = model.fc(torch.cat([g, m], dim=1)).view(-1)
    return scores.topk(top_k).indices.cpu().numpy().tolist()

recommended_ids = recommend_movies_for_user(model, user_id, top_k=10)
rec_df = rev_movies[rev_movies["reindexed_movieId"].isin(recommended_ids)]

print("\n🎯 RECOMMENDED FOR YOU:")
for _, row in rec_df.iterrows():
    print(f"   🎬 {row['title']}  ({row['genres']})")

print("\n✅ Dashboard loaded successfully!")


⚙️ Using device: cpu
🔐 Please log in to continue


✅ Welcome back, Ghorpade shreyasghorpade@gmail.com! (User ID = 610)
💖 You currently have 22 favorites: [20, 232, 34, 166, 15]

🍿 TOP 30 POPULAR MOVIES:
   🎞️ Forrest Gump (1994)  (Comedy|Drama|Romance|War)
   🎞️ Shawshank Redemption, The (1994)  (Crime|Drama)
   🎞️ Pulp Fiction (1994)  (Comedy|Crime|Drama|Thriller)
   🎞️ Silence of the Lambs, The (1991)  (Crime|Horror|Thriller)
   🎞️ Matrix, The (1999)  (Action|Sci-Fi|Thriller)
   🎞️ Star Wars: Episode IV - A New Hope (1977)  (Action|Adventure|Sci-Fi)
   🎞️ Jurassic Park (1993)  (Action|Adventure|Sci-Fi|Thriller)
   🎞️ Braveheart (1995)  (Action|Drama|War)
   🎞️ Terminator 2: Judgment Day (1991)  (Action|Sci-Fi)
   🎞️ Schindler's List (1993)  (Drama|War)
   🎞️ Fight Club (1999)  (Action|Crime|Drama|Thriller)
   🎞️ Toy Story (1995)  (Adventure|Animation|Children|Comedy|Fantasy)
   🎞️ Star Wars: Episode V - The Empire Strikes Back (1980)  (Action|Adventure|Sci-Fi)
   🎞️ American Beauty (