In [1]:
import random

import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD


2024-05-03 23:25:32.886843: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-03 23:25:33.211356: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [9]:
data_csv_path = "../../data/ml-25m/ratings.csv"
movies_path = "../../data/ml-25m/movies.csv"

model_path = "../recommender_models/recommender_10epochs.ckpt"

In [4]:
data = pd.read_csv(data_csv_path)
movies = pd.read_csv(movies_path)


In [5]:
data.sort_values(by="timestamp", inplace=True)

In [6]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")

In [7]:
random.sample(list(grp_by_train.groups), k=10)

[14253, 132723, 152750, 117471, 112460, 145038, 29927, 60143, 146684, 23902]

In [10]:
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [11]:
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [12]:
def predict(list_movies, model, movie_to_idx, idx_to_movie):
    
    ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(src)
    
    masked_pred = prediction[0, -1].numpy()
    
    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    
    return [idx_to_movie[a] for a in sorted_predicted_ids[:30] if a in idx_to_movie]


### Senario 1: Adventure/Fantasy 

In [14]:
list_movies = ["Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
               "Harry Potter and the Chamber of Secrets (2002)",
               "Harry Potter and the Prisoner of Azkaban (2004)",
               "Harry Potter and the Goblet of Fire (2005)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Shrek 2 (2004)',
 'Ratatouille (2007)',
 'Ice Age (2002)',
 "Pirates of the Caribbean: Dead Man's Chest (2006)",
 'Harry Potter and the Order of the Phoenix (2007)',
 'Harry Potter and the Deathly Hallows: Part 1 (2010)',
 'Harry Potter and the Half-Blood Prince (2009)',
 'Up (2009)',
 'Spider-Man 2 (2004)',
 'Star Wars: Episode III - Revenge of the Sith (2005)',
 'Iron Man (2008)',
 'X2: X-Men United (2003)',
 'Avatar (2009)',
 '300 (2007)',
 'WALL·E (2008)',
 'Incredibles, The (2004)',
 'I, Robot (2004)',
 'Matrix Revolutions, The (2003)',
 'Avengers, The (2012)',
 'Toy Story 3 (2010)',
 'Star Wars: Episode II - Attack of the Clones (2002)',
 'Juno (2007)',
 'Bourne Supremacy, The (2004)',
 'Chicken Run (2000)',
 'Chronicles of Narnia: The Lion, the Witch and the Wardrobe, The (2005)',
 'Men in Black II (a.k.a. MIIB) (a.k.a. MIB 2) (2002)',
 'Harry Potter and the Deathly Hallows: Part 2 (2011)',
 'Bruce Almighty (2003)',
 'Spirited Away (Sen to Chihiro no kamikakushi) (2001)',
 'La

### Senario 2:  Action/Adventure

In [15]:
list_movies = ["Black Panther (2017)",
               "Avengers, The (2012)",
               "Avengers: Infinity War - Part I (2018)",
               "Logan (2017)",
               "Spider-Man (2002)",
               "Spider-Man 3 (2007)",
               "Spider-Man: Far from Home (2019)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Iron Man (2008)',
 'Guardians of the Galaxy (2014)',
 'Iron Man 2 (2010)',
 'Interstellar (2014)',
 'Guardians of the Galaxy 2 (2017)',
 'Deadpool (2016)',
 'The Martian (2015)',
 'Harry Potter and the Prisoner of Azkaban (2004)',
 'Thor: Ragnarok (2017)',
 'Avatar (2009)',
 'I, Robot (2004)',
 'Rogue One: A Star Wars Story (2016)',
 'X2: X-Men United (2003)',
 "Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
 'Ant-Man (2015)',
 'Kingsman: The Secret Service (2015)',
 'Harry Potter and the Chamber of Secrets (2002)',
 'Avengers: Age of Ultron (2015)',
 'Dark Knight, The (2008)',
 'Star Wars: Episode III - Revenge of the Sith (2005)',
 'Star Wars: Episode VII - The Force Awakens (2015)',
 'Captain America: Civil War (2016)',
 "Pirates of the Caribbean: Dead Man's Chest (2006)",
 'Captain America: The Winter Soldier (2014)',
 'Edge of Tomorrow (2014)',
 'Matrix Revolutions, The (2003)',
 'Inception (2010)',
 'Harry Potter and the Order o

### Senario 3: Comedy

In [12]:
list_movies = ["Zootopia (2016)",
               "Toy Story 3 (2010)",
               "Toy Story 4 (2019)",
               "Finding Nemo (2003)",
               "Ratatouille (2007)",
               "The Lego Movie (2014)",
               "Ghostbusters (a.k.a. Ghost Busters) (1984)",
               "Ace Ventura: When Nature Calls (1995)"]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Home Alone (1990)',
 "Bug's Life, A (1998)",
 'Toy Story 2 (1999)',
 'Nightmare Before Christmas, The (1993)',
 'Babe (1995)',
 'Inside Out (2015)',
 'Mask, The (1994)',
 'Toy Story (1995)',
 'Back to the Future (1985)',
 'Back to the Future Part II (1989)',
 'Simpsons Movie, The (2007)',
 'Forrest Gump (1994)',
 'Austin Powers: International Man of Mystery (1997)',
 'Monty Python and the Holy Grail (1975)',
 'Cars (2006)',
 'Kung Fu Panda (2008)',
 'Groundhog Day (1993)',
 'American Pie (1999)',
 'Men in Black (a.k.a. MIB) (1997)',
 'Dumb & Dumber (Dumb and Dumber) (1994)',
 'Back to the Future Part III (1990)',
 'Big Hero 6 (2014)',
 'Mrs. Doubtfire (1993)',
 'Clueless (1995)',
 'Bruce Almighty (2003)',
 'Corpse Bride (2005)',
 'Deadpool (2016)',
 'Up (2009)',
 "Ferris Bueller's Day Off (1986)"]