In [2]:
import random
import json
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD


In [3]:
import torch
tensor = torch.rand(3,4)
print(f"Device tensor is stored on: {tensor.device}")
# Device tensor is stored on: cpu

print(torch.cuda.is_available())
#True

tensor = tensor.to('cuda')
print(f"Device tensor is stored on: {tensor.device}")
# Device tensor is stored on: cuda:0

Device tensor is stored on: cpu
True
Device tensor is stored on: cuda:0


In [4]:
# 檔案路徑
data_csv_path = "../ratings.csv"
movies_path = "../movies.csv"
# 檢查是否是最新的check point
model_path = "../recommender/recommender_models/recommender-v4.ckpt" 

In [5]:
data = pd.read_csv(data_csv_path)
movies = pd.read_csv(movies_path)


In [6]:
data.sort_values(by="timestamp", inplace=True)

In [7]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")

In [8]:
random.sample(list(grp_by_train.groups), k=10)

[31421, 56659, 50570, 61852, 96738, 113957, 139290, 46742, 55935, 32730]

In [9]:
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [10]:
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [11]:
def predict(list_movies, model, movie_to_idx, idx_to_movie):
    
    ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(src)
    
    masked_pred = prediction[0, -1].numpy()
    
    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    
    return [idx_to_movie[a] for a in sorted_predicted_ids[:30] if a in idx_to_movie]


### Senario 1: Adventure/Fantasy 

In [12]:
list_movies = ["Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
               "Harry Potter and the Chamber of Secrets (2002)",
               "Harry Potter and the Prisoner of Azkaban (2004)",
               "Harry Potter and the Goblet of Fire (2005)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Ice Age (2002)',
 "Pirates of the Caribbean: Dead Man's Chest (2006)",
 'Shrek 2 (2004)',
 'Harry Potter and the Order of the Phoenix (2007)',
 '300 (2007)',
 'Avatar (2009)',
 'Ratatouille (2007)',
 'Star Wars: Episode III - Revenge of the Sith (2005)',
 'Up (2009)',
 'I, Robot (2004)',
 'Juno (2007)',
 'Harry Potter and the Half-Blood Prince (2009)',
 'Avengers, The (2012)',
 "Bridget Jones's Diary (2001)",
 'Bruce Almighty (2003)',
 'X2: X-Men United (2003)',
 'Iron Man (2008)',
 'Big Fish (2003)',
 'Matrix Revolutions, The (2003)',
 'Spider-Man 2 (2004)',
 'Shaun of the Dead (2004)',
 'Dark Knight Rises, The (2012)',
 'Interstellar (2014)',
 'Harry Potter and the Deathly Hallows: Part 1 (2010)',
 'Spirited Away (Sen to Chihiro no kamikakushi) (2001)',
 'Slumdog Millionaire (2008)',
 'Moulin Rouge (2001)',
 '40-Year-Old Virgin, The (2005)',
 'Men in Black II (a.k.a. MIIB) (a.k.a. MIB 2) (2002)',
 'Pianist, The (2002)']

### Senario 2:  Action/Adventure

In [13]:
list_movies = ["Black Panther (2017)",
               "Avengers, The (2012)",
               "Avengers: Infinity War - Part I (2018)",
               "Logan (2017)",
               "Spider-Man (2002)",
               "Spider-Man 3 (2007)",
               "Spider-Man: Far from Home (2019)"]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Thor (2011)',
 'Thor: Ragnarok (2017)',
 'Avengers: Infinity War - Part II (2019)',
 'X-Men Origins: Wolverine (2009)',
 'Captain America: The First Avenger (2011)',
 'Iron Man (2008)',
 'Untitled Spider-Man Reboot (2017)',
 'X-Men: First Class (2011)',
 'Deadpool 2 (2018)',
 'Incredibles 2 (2018)',
 'Iron Man 2 (2010)',
 'Spider-Man 2 (2004)',
 'Avatar (2009)',
 'X2: X-Men United (2003)',
 'Inception (2010)',
 'Captain America: The Winter Soldier (2014)',
 'Guardians of the Galaxy 2 (2017)',
 'Batman Begins (2005)',
 'Spider-Man: Into the Spider-Verse (2018)',
 'Dark Knight, The (2008)',
 'X-Men (2000)',
 'Thor: The Dark World (2013)',
 'Ant-Man and the Wasp (2018)',
 'Matrix, The (1999)',
 'Coco (2017)',
 'Incredibles, The (2004)',
 'Dark Knight Rises, The (2012)',
 'Guardians of the Galaxy (2014)',
 'Rogue One: A Star Wars Story (2016)',
 'Pirates of the Caribbean: The Curse of the Black Pearl (2003)']

### Senario 3: Comedy

In [14]:
list_movies = ["Zootopia (2016)",
               "Toy Story 3 (2010)",
               "Toy Story 4 (2019)",
               "Finding Nemo (2003)",
               "Ratatouille (2007)",
               "The Lego Movie (2014)",
               "Ghostbusters (a.k.a. Ghost Busters) (1984)",
               "Ace Ventura: When Nature Calls (1995)"]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Moana (2016)',
 'Guardians of the Galaxy 2 (2017)',
 'Nightmare Before Christmas, The (1993)',
 'Inside Out (2015)',
 'Up (2009)',
 'Breakfast Club, The (1985)',
 'Toy Story (1995)',
 "Ferris Bueller's Day Off (1986)",
 'Coco (2017)',
 'Finding Dory (2016)',
 "Bug's Life, A (1998)",
 'Pitch Perfect (2012)',
 'Clueless (1995)',
 'Untitled Spider-Man Reboot (2017)',
 'Deadpool (2016)',
 'Big Hero 6 (2014)',
 'Toy Story 2 (1999)',
 'Thor: Ragnarok (2017)',
 'Groundhog Day (1993)',
 'Forrest Gump (1994)',
 'Easy A (2010)',
 'Back to the Future Part II (1989)',
 'Scott Pilgrim vs. the World (2010)',
 'Austin Powers: International Man of Mystery (1997)',
 'Wonder Woman (2017)',
 'How to Train Your Dragon (2010)',
 '21 Jump Street (2012)',
 'Beauty and the Beast (1991)',
 'Monsters, Inc. (2001)']