In [40]:
import random
import json
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD


In [41]:
import torch
tensor = torch.rand(3,4)
print(f"Device tensor is stored on: {tensor.device}")
# Device tensor is stored on: cpu

print(torch.cuda.is_available())
#True

tensor = tensor.to('cuda')
print(f"Device tensor is stored on: {tensor.device}")
# Device tensor is stored on: cuda:0

Device tensor is stored on: cpu
True
Device tensor is stored on: cuda:0


In [42]:
# 檔案路徑
# data_csv_path = "../ratings.csv"
# movies_path = "../movies.csv"
data_csv_path = "./netflix_prize_dataset/netflix_data_25M.csv"
movies_path = "./netflix_prize_dataset/netflix_movie.csv"
# 檢查是否是最新的check point
model_path = "./recommender_models/recommender.ckpt" 

In [44]:
data = pd.read_csv(data_csv_path)
movies = pd.read_csv(movies_path)


In [45]:
data.sort_values(by="date", inplace=True)

In [46]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")

In [47]:
random.sample(list(grp_by_train.groups), k=10)

[376884,
 546959,
 238560,
 267562,
 333729,
 377310,
 635797,
 437514,
 235364,
 411873]

In [48]:
model = Recommender(
        vocab_size=len(mapping) + 2,
        lr=1e-4,
        dropout=0.3,
    )
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [49]:
movie_to_idx = {a: mapping[b] for a, b in zip(movies.title.tolist(), movies.movieId.tolist()) if b in mapping}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}

In [106]:
def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def predict(list_movies, model, movie_to_idx, idx_to_movie):

    if type(list_movies[0]) == str:
        ids = [PAD] * (120 - len(list_movies) - 1) + [movie_to_idx[a] for a in list_movies] + [MASK]
    else:
        ids = [PAD] * (120 - len(list_movies) - 1) + list_movies + [MASK]
    
    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)
    
    with torch.no_grad():
        prediction = model(src)
    
    masked_pred = prediction[0, -1].numpy()

    masked_pred = NormalizeData(masked_pred)
    # precision = sorted(masked_pred, reverse=True)
    # for precision

    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]
    
    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]
    
    return [idx_to_movie[a] for a in sorted_predicted_ids[:30] if a in idx_to_movie], [masked_pred[ids] for ids in sorted_predicted_ids[:30]]


### Senario 1: Adventure/Fantasy 

In [14]:
# list_movies = ["Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
            #    "Harry Potter and the Chamber of Secrets (2002)",
            #    "Harry Potter and the Prisoner of Azkaban (2004)",
            #    "Harry Potter and the Goblet of Fire (2005)"]
list_movies=["Harry Potter and the Prisoner of Azkaban: Bonus Material",
            "Discovering the Real World of Harry Potter",
            "Harry Potter and the Chamber of Secrets",
            "Harry Potter and the Prisoner of Azkaban"]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Shrek 2',
 'Finding Nemo (Widescreen)',
 'Shrek (Full-screen)',
 'Spider-Man 2',
 'Shark Tale',
 'The Incredibles',
 "Harry Potter and the Sorcerer's Stone",
 'The Bourne Supremacy',
 'SpongeBob SquarePants: The Movie',
 'Garfield: The Movie',
 'Ice Age',
 'Collateral',
 'Dodgeball: A True Underdog Story',
 'Pirates of the Caribbean: The Curse of the Black Pearl',
 'The Terminal',
 'The Day After Tomorrow',
 'Troy',
 'Elf',
 'The Manchurian Candidate',
 'The Princess Diaries 2: Royal Engagement',
 'Napoleon Dynamite',
 'The Notebook',
 'Lord of the Rings: The Return of the King',
 'Daddy Day Care',
 'Anchorman: The Legend of Ron Burgundy',
 'Aladdin: Platinum Edition',
 'The Chronicles of Riddick',
 "A Bug's Life"]

### Senario 2:  Action/Adventure

In [15]:
# list_movies = ["Black Panther (2017)",
#                "Avengers, The (2012)",
#                "Avengers: Infinity War - Part I (2018)",
#                "Logan (2017)",
#                "Spider-Man (2002)",
#                "Spider-Man 3 (2007)",
#                "Spider-Man: Far from Home (2019)"]
list_movies = ["Spider-Man: The Return of the Green Goblin",
               "Spider-Man",
               "Spider-Man: The Venom Saga",
               "Spider-Man 2: Bonus Material",
               "X-Men: Evolution: Season 2"
]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Daredevil vs. Spiderman',
 'Scooby-Doo Meets Batman',
 'Pokemon: Mewtwo Returns',
 'Scooby-Doo',
 'Ice Age',
 'SpongeBob SquarePants: Halloween',
 'SpongeBob SquarePants: Nautical Nonsense / Sponge Buddies',
 'Jimmy Neutron: Boy Genius',
 'Scooby-Doo and the Legend of the Vampire',
 'Stuart Little 2',
 'Scooby-Doo and the Reluctant Werewolf',
 'SpongeBob SquarePants: Tales From the Deep',
 'Scooby-Doo and the Ghoul School',
 'SpongeBob SquarePants: Sea Stories',
 'Scooby-Doo and the Alien Invaders',
 'Schoolhouse Rock!: Special 30th Anniversary Edition',
 'Justice League',
 "Scooby-Doo's Spookiest Tales",
 'Harry Potter and the Chamber of Secrets',
 'Pokemon 3: The Movie',
 'Justice League: Justice on Trial',
 'Lord of the Rings: The Fellowship of the Ring',
 'Spirit: Stallion of the Cimarron',
 'Scooby-Doo and the Cyber Chase',
 'Scooby-Doo Meets the Harlem Globetrotters',
 'Transformers: The Movie',
 'Snow Dogs',
 'Scooby-Doo Goes Hollywood',
 'My Big Fat Greek Wedding']

### Senario 3: Comedy

In [14]:
# list_movies = ["Zootopia (2016)",
#                "Toy Story 3 (2010)",
#                "Toy Story 4 (2019)",
#                "Finding Nemo (2003)",
#                "Ratatouille (2007)",
#                "The Lego Movie (2014)",
#                "Ghostbusters (a.k.a. Ghost Busters) (1984)",
#                "Ace Ventura: When Nature Calls (1995)"]
# top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
# top_movie

['Moana (2016)',
 'Guardians of the Galaxy 2 (2017)',
 'Nightmare Before Christmas, The (1993)',
 'Inside Out (2015)',
 'Up (2009)',
 'Breakfast Club, The (1985)',
 'Toy Story (1995)',
 "Ferris Bueller's Day Off (1986)",
 'Coco (2017)',
 'Finding Dory (2016)',
 "Bug's Life, A (1998)",
 'Pitch Perfect (2012)',
 'Clueless (1995)',
 'Untitled Spider-Man Reboot (2017)',
 'Deadpool (2016)',
 'Big Hero 6 (2014)',
 'Toy Story 2 (1999)',
 'Thor: Ragnarok (2017)',
 'Groundhog Day (1993)',
 'Forrest Gump (1994)',
 'Easy A (2010)',
 'Back to the Future Part II (1989)',
 'Scott Pilgrim vs. the World (2010)',
 'Austin Powers: International Man of Mystery (1997)',
 'Wonder Woman (2017)',
 'How to Train Your Dragon (2010)',
 '21 Jump Street (2012)',
 'Beauty and the Beast (1991)',
 'Monsters, Inc. (2001)']

### Evaluation metrics

In [103]:
from sklearn import preprocessing
random_userId = random.choice(list(set(data.userId)))

In [107]:
user_input = list(data[(data.userId == random_userId) & (data.rating == 5)]['movieId'])
user_input = [idx_to_movie[a] for a in user_input if a in idx_to_movie]
top_movie, test = predict(user_input, model, movie_to_idx, idx_to_movie)
user_input

['Shoot to Kill',
 'Mondo Cane',
 'Morrissey: Oye Esteban!',
 '8 Seconds',
 'Men in Black',
 'Friday Foster',
 'SpongeBob SquarePants: Sea Stories',
 'The Twilight Zone: Vol. 43',
 'I See a Dark Stranger',
 "It's a Wonderful Life",
 'The Blonde',
 'Alive',
 'Twister',
 'Ozzy Osbourne: Live & Loud',
 'Chaplin: The Collection: Vol. 1',
 'Vampire Hunter D',
 'The Patriot',
 'Forrest Gump',
 'The Gambler V: Playing for Keeps',
 'Babe: Pig in the City',
 'The Return',
 'Charlie Chan: The Secret Service',
 'The Map of Sex and Love',
 'Adventures of Batman & Robin: The Joker/Fire & Ice',
 'SD Gundam Force',
 'My Louisiana Sky',
 'Scarface: 20th Anniversary Edition',
 'The Man Who Knew Too Little',
 'The Judy Garland Show',
 'Babylon 5: Season 4',
 "Sing Faster: The Stagehands' Ring Cycle",
 'Lexx: Series 3',
 'Secret Agent A.K.A. Danger Man',
 'The Alien Saga',
 'Life Is Beautiful',
 'Red Dwarf: Series 4: Bonus Material',
 'Trailer: The Movie',
 'The Accused',
 'The Final Hit',
 'The King of 

In [108]:
test

[1.366352,
 1.3386887,
 1.334954,
 1.3138496,
 1.3131086,
 1.3066211,
 1.2845986,
 1.2790482,
 1.2625632,
 1.2477815,
 1.2407762,
 1.2343926,
 1.2341886,
 1.2290834,
 1.2199923,
 1.21152,
 1.208349,
 1.2083226,
 1.2001097,
 1.1992903,
 1.1773345,
 1.1529144,
 1.1520725,
 1.145341,
 1.1399326,
 1.133071,
 1.1300371,
 1.1141818,
 1.1119843,
 1.0803291]

In [59]:
top_movie

['Cirque du Soleil: Alegria',
 'The Blue Planet: Seas of Life: Seasonal Seas - Coral Seas',
 'Horatio Hornblower: The New Adventures',
 'Monk: Season 1',
 'North & South: The Complete Collection',
 'Quantum Leap: Season 1',
 'Xena: Warrior Princess: Season 2',
 'Horatio Hornblower',
 'The Thin Blue Line: The Complete Lineup',
 'Dead Like Me: Season 1',
 'Jeeves and Wooster: Season 1',
 'CSI: Miami: Season 1',
 'Whales: An Unforgettable Journey: IMAX',
 "Foyle's War: Series 1",
 'The Blue Planet: Seas of Life: Tidal Seas - Coasts',
 'Keeping Up Appearances',
 'Dark Shadows: Vol. 5',
 'The Day After Tomorrow',
 'Cirque du Soleil: Journey of Man: IMAX',
 'Ring of Fire: IMAX',
 'The Name of the Rose',
 'Africa: The Serengeti: IMAX',
 'Queen: Live at Wembley Stadium',
 'The Eruption of Mount St. Helens: IMAX',
 'Red Dwarf: Series 2',
 'Alaska: Spirit of the Wild: IMAX',
 'Xena: Warrior Princess: Season 1',
 'Roswell: Season 1',
 'Red Dwarf: Series 4',
 'Buck Rogers in the 25th Century: The 

In [31]:
for movie in user_input:
movies[(movies.movieId==14621)]['title']

14348    Shrek (Full-screen)
Name: title, dtype: object